hpcc-1.4.1/DGEMM/onecpu.c0000644000000000000000000000551111256503657011623 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ #include int HPCC_StarDGEMM(HPCC_Params *params) { int myRank, commSize; double localGflops, minGflops, maxGflops, avgGflops; int n; int rv, errCount, failure, failureAll; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; localGflops = minGflops = maxGflops = avgGflops = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); rv = HPCC_TestDGEMM( params, 0 == myRank ? 1 : 0, &localGflops, &n, &failure ); params->DGEMM_N = n; MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); if (failureAll) params->Failure = 1; MPI_Reduce( &localGflops, &minGflops, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &localGflops, &avgGflops, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &localGflops, &maxGflops, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); avgGflops /= (double)commSize; MPI_Bcast( &avgGflops, 1, MPI_DOUBLE, 0, comm ); params->StarDGEMMGflops = avgGflops; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Minimum Gflop/s %.6f\n", minGflops ); fprintf( outputFile, "Average Gflop/s %.6f\n", avgGflops ); fprintf( outputFile, "Maximum Gflop/s %.6f\n", maxGflops ); END_IO( myRank, outputFile ); return 0; } int HPCC_SingleDGEMM(HPCC_Params *params) { int myRank, commSize; int rv, errCount, rank, failure = 0; double localGflops; int n; double scl = 1.0 / RAND_MAX; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; localGflops = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); srand(time(NULL)); scl *= (double)commSize; /* select a node at random, but not node 0 (unless there is just one node) */ if (1 == commSize) rank = 0; else for (rank = 0; ; rank = (int)(scl * rand())) { if (rank > 0 && rank < commSize) break; } MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ if (myRank == rank) /* if this node has been selected */ rv = HPCC_TestDGEMM( params, 0 == myRank ? 1 : 0, &localGflops, &n, &failure ); MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ errCount = rv; if (failure) params->Failure = 1; /* broadcast result */ MPI_Bcast( &localGflops, 1, MPI_DOUBLE, rank, comm ); params->SingleDGEMMGflops = localGflops; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Node selected %d\n", rank ); fprintf( outputFile, "Single DGEMM Gflop/s %.6f\n", localGflops ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/DGEMM/tstdgemm.c0000644000000000000000000000663511256503657012166 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* tstdgemm.c */ #include /* Generates random matrix with entries between 0.0 and 1.0 */ static void dmatgen(int m, int n, double *a, int lda, int seed) { int i, j; double *a0 = a, rcp = 1.0 / RAND_MAX; srand( seed ); for (j = 0; j < n; j++) { for (i = 0; i < m; i++) a0[i] = rcp * rand(); a0 += lda; } } static double dnrm_inf(int m, int n, double *a, int lda) { int i, j, k, lnx; double mx, *a0; int nx = 10; double x[10]; mx = 0.0; for (i = 0; i < m; i += nx) { lnx = Mmin( nx, m-i ); for (k = 0; k < lnx; ++k) x[k] = 0.0; a0 = a + i; for (j = 0; j < n; ++j) { for (k = 0; k < lnx; ++k) x[k] += fabs( a0[k] ); a0 += lda; } for (k = 0; k < lnx; ++k) if (mx < x[k]) mx = x[k]; } return mx; } int HPCC_TestDGEMM(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure) { int n, lda, ldb, ldc, failure = 1; double *a, *b, *c, *x, *y, *z, alpha, beta, sres, cnrm, xnrm; double Gflops = 0.0, dn, t0, t1; long l_n; FILE *outFile; int seed_a, seed_b, seed_c, seed_x; if (doIO) { outFile = fopen( params->outFname, "a" ); if (! outFile) { outFile = stderr; fprintf( outFile, "Cannot open output file.\n" ); return 1; } } n = (int)(sqrt( params->HPLMaxProcMem / sizeof(double) / 3 + 0.25 ) - 0.5); if (n < 0) n = -n; /* if 'n' has overflown an integer */ l_n = n; lda = ldb = ldc = n; a = HPCC_XMALLOC( double, l_n * l_n ); b = HPCC_XMALLOC( double, l_n * l_n ); c = HPCC_XMALLOC( double, l_n * l_n ); x = HPCC_XMALLOC( double, l_n ); y = HPCC_XMALLOC( double, l_n ); z = HPCC_XMALLOC( double, l_n ); if (! a || ! b || ! c || ! x || ! y || ! z) { goto comp_end; } seed_a = (int)time( NULL ); dmatgen( n, n, a, n, seed_a ); seed_b = (int)time( NULL ); dmatgen( n, n, b, n, seed_b ); seed_c = (int)time( NULL ); dmatgen( n, n, c, n, seed_c ); seed_x = (int)time( NULL ); dmatgen( n, 1, x, n, seed_x ); alpha = a[n / 2]; beta = b[n / 2]; t0 = MPI_Wtime(); HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, n, n, alpha, a, n, b, n, beta, c, n ); t1 = MPI_Wtime(); t1 -= t0; dn = (double)n; if (t1 != 0.0 && t1 != -0.0) Gflops = 2.0e-9 * dn * dn * dn / t1; else Gflops = 0.0; cnrm = dnrm_inf( n, n, c, n ); xnrm = dnrm_inf( n, 1, x, n ); /* y <- c*x */ HPL_dgemv( HplColumnMajor, HplNoTrans, n, n, 1.0, c, ldc, x, 1, 0.0, y, 1 ); /* z <- b*x */ HPL_dgemv( HplColumnMajor, HplNoTrans, n, n, 1.0, b, ldb, x, 1, 0.0, z, 1 ); /* y <- alpha * a * z - y */ HPL_dgemv( HplColumnMajor, HplNoTrans, n, n, alpha, a, lda, z, 1, -1.0, y, 1 ); dmatgen( n, n, c, n, seed_c ); /* y <- beta * c_orig * x + y */ HPL_dgemv( HplColumnMajor, HplNoTrans, n, n, beta, c, ldc, x, 1, 1.0, y, 1 ); sres = dnrm_inf( n, 1, y, n ) / cnrm / xnrm / n / HPL_dlamch( HPL_MACH_EPS ); if (doIO) fprintf( outFile, "Scaled residual: %g\n", sres ); if (sres < params->test.thrsh) failure = 0; comp_end: if (z) HPCC_free( z ); if (y) HPCC_free( y ); if (x) HPCC_free( x ); if (c) HPCC_free( c ); if (b) HPCC_free( b ); if (a) HPCC_free( a ); if (doIO) { fflush( outFile ); fclose( outFile ); } if (UGflops) *UGflops = Gflops; if (Un) *Un = n; if (Ufailure) *Ufailure = failure; return 0; } hpcc-1.4.1/FFT/bcnrand.c0000644000000000000000000001427011256503657011531 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* bcnrand.c This is a version of bcnrand.f written using 64-bit integers (on 64-bit systems it's 'long', on 32-bit systems it's 'long long'). */ /* ! This routine generates a sequence of IEEE 64-bit floating-point pseudorandom ! numbers in the range (0, 1), based on the recently discovered class of ! normal numbers described in the paper "Random Generators and Normal Numbers" ! by DHB and Richard Crandall, Experimental Mathematics, vol 11, no 4 (2002), ! available at http://crd.lbl.gov/~dhbailey/dhbpapers/bcnormal-em.pdf. ! The sequence generated consists of consecutive 53-bit sections of the binary ! expansion of alpha_{2,3} = sum_k 1/((3^k)*2^(3^k))), which is known to be ! 2-normal, normalized by 2^53 so as to be uniform in (0,1). The parameter a ! is the starting index, or in other words the seed of the pseudorandom ! sequence. To obtain the maximum period, a should be set to at least ! 3^33 + 100 and must not exceed 2^53. When a is set in this range, the period ! of the sequence generated by this code is 2x3^32 = 3.706e15. Also, the ! sequence will not necessarily match the binary digits of alpha_{2,3} if more ! than this many elements are generated. ! The bcnrand routine facilitates simple parallelization. For example, in ! an MPI program, suppose that kpr is the processor number and npr is the ! number of processors. Then the line ! call bcnrand (n/npr, a + 53*n/npr*kpr, x) ! generates on each processor a section of length n/npr. In this way, the ! npr processors collectively have the same n-long sequence (provided that ! n is divisible by npr) as is generated on a single processor system by ! means of the line ! call bcnrand (n, a, x) ! This code is designed for systems with IEEE 64-bit floating-point arithmetic. ! On IBM systems and others with a fused multiply-add instruction, see the ! !> comments below for changes that will improve performance. On IBM systems, ! use the compiler flag -qstrict. ! David H. Bailey 2004-05-12 */ #include "hpccfft.h" typedef u64Int_t Big[2]; /* r = a * b */ static void ddmuldd(u64Int_t a, u64Int_t b, Big r) { u64Int_t a0, a1, b0, b1, hb, acc, acc1; /* 'hb' should be 0xFFFFFFFF (first 32-bits set to one) */ hb = 65535L; hb = (hb << 16) | hb; /* split 'a' and 'b' into two 32-bit quantities */ a0 = a & hb; a1 = (a >> 32) & hb; b0 = b & hb; b1 = (b >> 32) & hb; acc = a0 * b0; r[0] = acc & hb; acc >>= 32; acc += a1 * b0; acc1 = acc >> 32; acc &= hb; acc += a0 * b1; r[0] += (acc & hb) << 32; acc >>= 32; acc += acc1; acc += a1 * b1; r[1] = acc; } /* r = a - b */ static void ddsub(Big a, Big b, Big r) { u64Int_t mx = 0; mx = ~mx; r[1] = a[1] - b[1]; if (a[0] >= b[0]) r[0] = a[0] - b[0]; else { r[1] -= 1; r[0] = mx - b[0] + 1 + a[0]; } } /* q = d / v; reminder Ur */ static void dddiv(Big d, u64Int_t v, Big q, u64Int_t *Ur) { u64Int_t r1, r0, v1, v0, msb = 1, mx = 0, one = 1; int i; msb <<= 63; mx = ~mx; q[0] = q[1] = 0; if (v <= d[1]) { q[1] = d[1] / v; r1 = d[1] % v; } else { r1 = d[1]; q[1] = 0; } r0 = d[0]; while (r1) { v1 = 0; v0 = v; for (i = 0; v1 <= r1; i++) { v1 <<= 1; if (msb & v0) v1 |= 1; v0 <<= 1; } do { i--; v0 >>= 1; v0 &= mx; if (1 & v1) v0 |= msb; v1 >>= 1; } while (v1 == r1 && v0 > r0); /* make sure (v1,v0) is not too big */ q[0] += one << i; r1 -= v1; if (r0 >= v0) r0 -= v0; else { r0 += mx - v0 + 1; r1 -= 1; } } q[0] += r0 / v; r0 %= v; if (Ur) *Ur = r0; } /* ! expm2 = 2^p mod am. p2 is a table with powers of 2, i.e., p2(i) = 2^i. ! This routine uses a left-to-right binary exponentiation scheme. */ static u64Int_t expm2(u64Int_t p, u64Int_t am) { u64Int_t p2, p1, pt1, r; Big ddm, dd1, dd2; int i; for (p2 = i = 1; i < 54; i++) { p2 <<= 1; if (p2 > p) break; } p1 = p; pt1 = p2 >> 1; r = 1; ddm[0] = am; ddm[1] = 0; while (1) { if (p1 >= pt1) { /* r = mod(2.0 * r, am) */ ddmuldd( 2, r, dd1 ); if (dd1[0] > am) { ddsub(dd1, ddm, dd2); dd1[0] = dd2[0]; dd1[1] = dd2[1]; } r = dd1[0]; p1 = p1 - pt1; } pt1 /= 2; if (pt1 >= 1) { /* r = mod(r * r, am) */ ddmuldd( r, r, dd1 ); dddiv( dd1, am, dd2, &r ); continue; } break; } return r; } /* Let minA = 3^33 + 100 If `a' is smaller than `minA' then `a' is incremented by `minA' this value. In this way, you can seed the generator with small integers and the requirements will be fullfilled internally. */ int HPCC_bcnrand(u64Int_t n, u64Int_t a, void *vx) { u64Int_t d1, d2, d3, t53, p3i, ui, minA; s64Int_t sd1, sp3i; Big dd1, dd2; int i; double rcp, two64, v, *x = (double *)vx; /* minA = 3.d0 ** 33 + 100.d0 */ minA = 20709114; minA <<= 28; minA += 106609639; /* make sure `a' is big enough */ if (a < minA) a += minA; t53 = 1; t53 <<= 53; d1 = 1; for (i = 0; i < 53; i++) { d1 *= 3; if (d1 > a) break; } /* two64 = 2 ** 64 */ two64 = 2.0; for (i = 0; i < 6; i++) two64 *= two64; p3i = d1 / 3; sp3i = (s64Int_t)p3i; rcp = 1.0 / p3i; /* ! Calculate starting element. This code performs the following: ! d1 = [int[p3i/2] * 2^(a-p3i)] mod p3i. */ /* d1 = (p3i/2 * (2 ** (a-p3i))) % p3i */ d2 = expm2( a - p3i, p3i ); d3 = p3i / 2; ddmuldd( d2, d3, dd1 ); dddiv( dd1, p3i, dd2, &d1 ); x[0] = d1 * rcp; for (ui = 1; ui < n; ui++) { /* dd1 = d1 * t53 */ dd1[1] = (d1 >> 11); dd1[0] = (d1 << 53); /* Approximate `dd1/p3i' (the result should be off by 1) */ v = ((two64 * (double)dd1[1]) + (double)dd1[0]) * rcp; ddmuldd( (u64Int_t)v, p3i, dd2 ); /* The value of `dd1-dd2' should between `-p3i' and 'p3i', hence upper halves of `dd1' and `dd2' can be ignored */ sd1 = (s64Int_t)(dd1[0] - dd2[0]); /* Check the above approximation */ if (sd1 < 0) sd1 += sp3i; if (sd1 > sp3i) sd1 -= sp3i; /* d1 = (d1 * t53) % p3i */ d1 = (u64Int_t)sd1; x[ui] = d1 * rcp; } return 0; } hpcc-1.4.1/FFT/fft235.c0000644000000000000000000007151111256503657011134 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* C C FFTE: A FAST FOURIER TRANSFORM PACKAGE C C (C) COPYRIGHT SOFTWARE, 2000-2004, ALL RIGHTS RESERVED C BY C DAISUKE TAKAHASHI C GRADUATE SCHOOL OF SYSTEMS AND INFORMATION ENGINEERING C UNIVERSITY OF TSUKUBA C 1-1-1 TENNODAI, TSUKUBA, IBARAKI 305-8573, JAPAN C E-MAIL: daisuke@cs.tsukuba.ac.jp C C C RADIX-2, 3, 4, 5 AND 8 FFT ROUTINE C C FORTRAN77 SOURCE PROGRAM C C WRITTEN BY DAISUKE TAKAHASHI C */ #include "hpccfft.h" static void fft2(fftw_complex *a, fftw_complex *b, int m) { int i, lda, ldb; double x0, x1, y0, y1; lda = m; ldb = m; for (i = 0; i < m; ++i) { x0 = c_re( ARR2D( a, i, 0, lda ) ); y0 = c_im( ARR2D( a, i, 0, lda ) ); x1 = c_re( ARR2D( a, i, 1, lda ) ); y1 = c_im( ARR2D( a, i, 1, lda ) ); c_re( ARR2D( b, i, 0, ldb ) ) = x0 + x1; c_im( ARR2D( b, i, 0, ldb ) ) = y0 + y1; c_re( ARR2D( b, i, 1, ldb ) ) = x0 - x1; c_im( ARR2D( b, i, 1, ldb ) ) = y0 - y1; } } static void fft4a(fftw_complex *a, fftw_complex *b, fftw_complex *w, int l) { int j, lda, ldb; double wr1, wr2, wr3, wi1, wi2, wi3; double x0, x1, x2, x3, y0, y1, y2, y3; lda = l; ldb = 4; for (j = 0; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2 = wr1*wr1 - wi1*wi1; wi2 = wr1*wi1 + wr1*wi1; wr3 = wr1*wr2 - wi1*wi2; wi3 = wr1*wi2 + wi1*wr2; x0 = c_re( ARR2D( a, j, 0, lda ) ) + c_re( ARR2D( a, j, 2, lda ) ); y0 = c_im( ARR2D( a, j, 0, lda ) ) + c_im( ARR2D( a, j, 2, lda ) ); x1 = c_re( ARR2D( a, j, 0, lda ) ) - c_re( ARR2D( a, j, 2, lda ) ); y1 = c_im( ARR2D( a, j, 0, lda ) ) - c_im( ARR2D( a, j, 2, lda ) ); x2 = c_re( ARR2D( a, j, 1, lda ) ) + c_re( ARR2D( a, j, 3, lda ) ); y2 = c_im( ARR2D( a, j, 1, lda ) ) + c_im( ARR2D( a, j, 3, lda ) ); x3 = c_im( ARR2D( a, j, 1, lda ) ) - c_im( ARR2D( a, j, 3, lda ) ); y3 = c_re( ARR2D( a, j, 3, lda ) ) - c_re( ARR2D( a, j, 1, lda ) ); c_re( ARR2D( b, 0, j, ldb ) ) = x0 + x2; c_im( ARR2D( b, 0, j, ldb ) ) = y0 + y2; c_re( ARR2D( b, 2, j, ldb ) ) = wr2 * (x0-x2) - wi2 * (y0-y2); c_im( ARR2D( b, 2, j, ldb ) ) = wr2 * (y0-y2) + wi2 * (x0-x2); c_re( ARR2D( b, 1, j, ldb ) ) = wr1 * (x1+x3) - wi1 * (y1+y3); c_im( ARR2D( b, 1, j, ldb ) ) = wr1 * (y1+y3) + wi1 * (x1+x3); c_re( ARR2D( b, 3, j, ldb ) ) = wr3 * (x1-x3) - wi3 * (y1-y3); c_im( ARR2D( b, 3, j, ldb ) ) = wr3 * (y1-y3) + wi3 * (x1-x3); } } static void fft4b(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { int i, j, lda1, lda2, ldb1, ldb2; double x0, x1, x2, x3, y0, y1, y2, y3; double wr1, wr2, wr3, wi1, wi2, wi3; lda1 = m; lda2 = l; ldb1 = m; ldb2 = 4; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, 0, 0, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 2, lda1, lda2 ) ); y0 = c_im( ARR3D( a, i, 0, 0, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 2, lda1, lda2 ) ); x1 = c_re( ARR3D( a, i, 0, 0, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 2, lda1, lda2 ) ); y1 = c_im( ARR3D( a, i, 0, 0, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 2, lda1, lda2 ) ); x2 = c_re( ARR3D( a, i, 0, 1, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 3, lda1, lda2 ) ); y2 = c_im( ARR3D( a, i, 0, 1, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 3, lda1, lda2 ) ); x3 = c_im( ARR3D( a, i, 0, 1, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 3, lda1, lda2 ) ); y3 = c_re( ARR3D( a, i, 0, 3, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 1, lda1, lda2 ) ); c_re( ARR3D( b, i, 0, 0, ldb1, ldb2 ) ) = x0 + x2; c_im( ARR3D( b, i, 0, 0, ldb1, ldb2 ) ) = y0 + y2; c_re( ARR3D( b, i, 2, 0, ldb1, ldb2 ) ) = x0 - x2; c_im( ARR3D( b, i, 2, 0, ldb1, ldb2 ) ) = y0 - y2; c_re( ARR3D( b, i, 1, 0, ldb1, ldb2 ) ) = x1 + x3; c_im( ARR3D( b, i, 1, 0, ldb1, ldb2 ) ) = y1 + y3; c_re( ARR3D( b, i, 3, 0, ldb1, ldb2 ) ) = x1 - x3; c_im( ARR3D( b, i, 3, 0, ldb1, ldb2 ) ) = y1 - y3; } for (j = 1; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2 = wr1*wr1 - wi1*wi1; wi2 = wr1*wi1 + wr1*wi1; wr3 = wr1*wr2 - wi1*wi2; wi3 = wr1*wi2 + wi1*wr2; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, j, 0, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 2, lda1, lda2 ) ); y0 = c_im( ARR3D( a, i, j, 0, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 2, lda1, lda2 ) ); x1 = c_re( ARR3D( a, i, j, 0, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 2, lda1, lda2 ) ); y1 = c_im( ARR3D( a, i, j, 0, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 2, lda1, lda2 ) ); x2 = c_re( ARR3D( a, i, j, 1, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 3, lda1, lda2 ) ); y2 = c_im( ARR3D( a, i, j, 1, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 3, lda1, lda2 ) ); x3 = c_im( ARR3D( a, i, j, 1, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 3, lda1, lda2 ) ); y3 = c_re( ARR3D( a, i, j, 3, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 1, lda1, lda2 ) ); c_re( ARR3D( b, i, 0, j, ldb1, ldb2 ) ) = x0 + x2; c_im( ARR3D( b, i, 0, j, ldb1, ldb2 ) ) = y0 + y2; c_re( ARR3D( b, i, 2, j, ldb1, ldb2 ) ) = wr2 * (x0-x2) - wi2 * (y0-y2); c_im( ARR3D( b, i, 2, j, ldb1, ldb2 ) ) = wr2 * (y0-y2) + wi2 * (x0-x2); c_re( ARR3D( b, i, 1, j, ldb1, ldb2 ) ) = wr1 * (x1+x3) - wi1 * (y1+y3); c_im( ARR3D( b, i, 1, j, ldb1, ldb2 ) ) = wr1 * (y1+y3) + wi1 * (x1+x3); c_re( ARR3D( b, i, 3, j, ldb1, ldb2 ) ) = wr3 * (x1-x3) - wi3 * (y1-y3); c_im( ARR3D( b, i, 3, j, ldb1, ldb2 ) ) = wr3 * (y1-y3) + wi3 * (x1-x3); } } } static void fft8a(fftw_complex *a, fftw_complex *b, fftw_complex *w, int l) { int j, lda, ldb; double x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7; double wr1, wr2, wr3, wr4, wr5, wr6, wr7, wi1, wi2, wi3, wi4, wi5, wi6, wi7; double u0, u1, u2, u3, v0, v1, v2, v3; double c81 = 0.70710678118654752; lda = l; ldb = 8; for (j = 0; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2 = wr1*wr1 - wi1*wi1; wi2 = wr1*wi1 + wr1*wi1; wr3 = wr1*wr2 - wi1*wi2; wi3 = wr1*wi2 + wi1*wr2; wr4 = wr2*wr2 - wi2*wi2; wi4 = wr2*wi2 + wr2*wi2; wr5 = wr2*wr3 - wi2*wi3; wi5 = wr2*wi3 + wi2*wr3; wr6 = wr3*wr3 - wi3*wi3; wi6 = wr3*wi3 + wr3*wi3; wr7 = wr3*wr4 - wi3*wi4; wi7 = wr3*wi4 + wi3*wr4; x0 = c_re( ARR2D( a, j, 0, lda ) ) + c_re( ARR2D( a, j, 4, lda ) ); y0 = c_im( ARR2D( a, j, 0, lda ) ) + c_im( ARR2D( a, j, 4, lda ) ); x1 = c_re( ARR2D( a, j, 0, lda ) ) - c_re( ARR2D( a, j, 4, lda ) ); y1 = c_im( ARR2D( a, j, 0, lda ) ) - c_im( ARR2D( a, j, 4, lda ) ); x2 = c_re( ARR2D( a, j, 2, lda ) ) + c_re( ARR2D( a, j, 6, lda ) ); y2 = c_im( ARR2D( a, j, 2, lda ) ) + c_im( ARR2D( a, j, 6, lda ) ); x3 = c_im( ARR2D( a, j, 2, lda ) ) - c_im( ARR2D( a, j, 6, lda ) ); y3 = c_re( ARR2D( a, j, 6, lda ) ) - c_re( ARR2D( a, j, 2, lda ) ); u0 = x0 + x2; v0 = y0 + y2; u1 = x0 - x2; v1 = y0 - y2; x4 = c_re( ARR2D( a, j, 1, lda ) ) + c_re( ARR2D( a, j, 5, lda ) ); y4 = c_im( ARR2D( a, j, 1, lda ) ) + c_im( ARR2D( a, j, 5, lda ) ); x5 = c_re( ARR2D( a, j, 1, lda ) ) - c_re( ARR2D( a, j, 5, lda ) ); y5 = c_im( ARR2D( a, j, 1, lda ) ) - c_im( ARR2D( a, j, 5, lda ) ); x6 = c_re( ARR2D( a, j, 3, lda ) ) + c_re( ARR2D( a, j, 7, lda ) ); y6 = c_im( ARR2D( a, j, 3, lda ) ) + c_im( ARR2D( a, j, 7, lda ) ); x7 = c_re( ARR2D( a, j, 3, lda ) ) - c_re( ARR2D( a, j, 7, lda ) ); y7 = c_im( ARR2D( a, j, 3, lda ) ) - c_im( ARR2D( a, j, 7, lda ) ); u2 = x4 + x6; v2 = y4 + y6; u3 = y4 - y6; v3 = x6 - x4; c_re( ARR2D( b, 0, j, ldb ) ) = u0 + u2; c_im( ARR2D( b, 0, j, ldb ) ) = v0 + v2; c_re( ARR2D( b, 4, j, ldb ) ) = wr4 * (u0-u2) - wi4 * (v0-v2); c_im( ARR2D( b, 4, j, ldb ) ) = wr4 * (v0-v2) + wi4 * (u0-u2); c_re( ARR2D( b, 2, j, ldb ) ) = wr2 * (u1+u3) - wi2 * (v1+v3); c_im( ARR2D( b, 2, j, ldb ) ) = wr2 * (v1+v3) + wi2 * (u1+u3); c_re( ARR2D( b, 6, j, ldb ) ) = wr6 * (u1-u3) - wi6 * (v1-v3); c_im( ARR2D( b, 6, j, ldb ) ) = wr6 * (v1-v3) + wi6 * (u1-u3); u0 = x1 + c81 * (x5 - x7); v0 = y1 + c81 * (y5 - y7); u1 = x1 - c81 * (x5 - x7); v1 = y1 - c81 * (y5 - y7); u2 = x3 + c81 * (y5 + y7); v2 = y3 - c81 * (x5 + x7); u3 = x3 - c81 * (y5 + y7); v3 = y3 + c81 * (x5 + x7); c_re( ARR2D( b, 1, j, ldb ) ) = wr1 * (u0+u2) - wi1 * (v0+v2); c_im( ARR2D( b, 1, j, ldb ) ) = wr1 * (v0+v2) + wi1 * (u0+u2); c_re( ARR2D( b, 5, j, ldb ) ) = wr5 * (u1+u3) - wi5 * (v1+v3); c_im( ARR2D( b, 5, j, ldb ) ) = wr5 * (v1+v3) + wi5 * (u1+u3); c_re( ARR2D( b, 3, j, ldb ) ) = wr3 * (u1-u3) - wi3 * (v1-v3); c_im( ARR2D( b, 3, j, ldb ) ) = wr3 * (v1-v3) + wi3 * (u1-u3); c_re( ARR2D( b, 7, j, ldb ) ) = wr7 * (u0-u2) - wi7 * (v0-v2); c_im( ARR2D( b, 7, j, ldb ) ) = wr7 * (v0-v2) + wi7 * (u0-u2); } } static void fft8b(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { int i, j, lda1, lda2, ldb1, ldb2; double x0, x1, x2, x3, x4, x5, x6, x7, y0, y1, y2, y3, y4, y5, y6, y7; double wr1, wr2, wr3, wr4, wr5, wr6, wr7, wi1, wi2, wi3, wi4, wi5, wi6, wi7; double u0, u1, u2, u3, v0, v1, v2, v3; double c81 = 0.70710678118654752; lda1 = m; lda2 = l; ldb1 = m; ldb2 = 8; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, 0, 0, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 4, lda1, lda2 ) ); y0 = c_im( ARR3D( a, i, 0, 0, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 4, lda1, lda2 ) ); x1 = c_re( ARR3D( a, i, 0, 0, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 4, lda1, lda2 ) ); y1 = c_im( ARR3D( a, i, 0, 0, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 4, lda1, lda2 ) ); x2 = c_re( ARR3D( a, i, 0, 2, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 6, lda1, lda2 ) ); y2 = c_im( ARR3D( a, i, 0, 2, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 6, lda1, lda2 ) ); x3 = c_im( ARR3D( a, i, 0, 2, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 6, lda1, lda2 ) ); y3 = c_re( ARR3D( a, i, 0, 6, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 2, lda1, lda2 ) ); u0 = x0 + x2; v0 = y0 + y2; u1 = x0 - x2; v1 = y0 - y2; x4 = c_re( ARR3D( a, i, 0, 1, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 5, lda1, lda2 ) ); y4 = c_im( ARR3D( a, i, 0, 1, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 5, lda1, lda2 ) ); x5 = c_re( ARR3D( a, i, 0, 1, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 5, lda1, lda2 ) ); y5 = c_im( ARR3D( a, i, 0, 1, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 5, lda1, lda2 ) ); x6 = c_re( ARR3D( a, i, 0, 3, lda1, lda2 ) ) + c_re( ARR3D( a, i, 0, 7, lda1, lda2 ) ); y6 = c_im( ARR3D( a, i, 0, 3, lda1, lda2 ) ) + c_im( ARR3D( a, i, 0, 7, lda1, lda2 ) ); x7 = c_re( ARR3D( a, i, 0, 3, lda1, lda2 ) ) - c_re( ARR3D( a, i, 0, 7, lda1, lda2 ) ); y7 = c_im( ARR3D( a, i, 0, 3, lda1, lda2 ) ) - c_im( ARR3D( a, i, 0, 7, lda1, lda2 ) ); u2 = x4 + x6; v2 = y4 + y6; u3 = y4 - y6; v3 = x6 - x4; c_re( ARR3D( b, i, 0, 0, ldb1, ldb2 ) ) = u0 + u2; c_im( ARR3D( b, i, 0, 0, ldb1, ldb2 ) ) = v0 + v2; c_re( ARR3D( b, i, 4, 0, ldb1, ldb2 ) ) = u0 - u2; c_im( ARR3D( b, i, 4, 0, ldb1, ldb2 ) ) = v0 - v2; c_re( ARR3D( b, i, 2, 0, ldb1, ldb2 ) ) = u1 + u3; c_im( ARR3D( b, i, 2, 0, ldb1, ldb2 ) ) = v1 + v3; c_re( ARR3D( b, i, 6, 0, ldb1, ldb2 ) ) = u1 - u3; c_im( ARR3D( b, i, 6, 0, ldb1, ldb2 ) ) = v1 - v3; u0 = x1 + c81 * (x5 - x7); v0 = y1 + c81 * (y5 - y7); u1 = x1 - c81 * (x5 - x7); v1 = y1 - c81 * (y5 - y7); u2 = x3 + c81 * (y5 + y7); v2 = y3 - c81 * (x5 + x7); u3 = x3 - c81 * (y5 + y7); v3 = y3 + c81 * (x5 + x7); c_re( ARR3D( b, i, 1, 0, ldb1, ldb2 ) ) = u0 + u2; c_im( ARR3D( b, i, 1, 0, ldb1, ldb2 ) ) = v0 + v2; c_re( ARR3D( b, i, 5, 0, ldb1, ldb2 ) ) = u1 + u3; c_im( ARR3D( b, i, 5, 0, ldb1, ldb2 ) ) = v1 + v3; c_re( ARR3D( b, i, 3, 0, ldb1, ldb2 ) ) = u1 - u3; c_im( ARR3D( b, i, 3, 0, ldb1, ldb2 ) ) = v1 - v3; c_re( ARR3D( b, i, 7, 0, ldb1, ldb2 ) ) = u0 - u2; c_im( ARR3D( b, i, 7, 0, ldb1, ldb2 ) ) = v0 - v2; } for (j = 1; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2 = wr1*wr1 - wi1*wi1; wi2 = wr1*wi1 + wr1*wi1; wr3 = wr1*wr2 - wi1*wi2; wi3 = wr1*wi2 + wi1*wr2; wr4 = wr2*wr2 - wi2*wi2; wi4 = wr2*wi2 + wr2*wi2; wr5 = wr2*wr3 - wi2*wi3; wi5 = wr2*wi3 + wi2*wr3; wr6 = wr3*wr3 - wi3*wi3; wi6 = wr3*wi3 + wr3*wi3; wr7 = wr3*wr4 - wi3*wi4; wi7 = wr3*wi4 + wi3*wr4; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, j, 0, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 4, lda1, lda2 ) ); y0 = c_im( ARR3D( a, i, j, 0, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 4, lda1, lda2 ) ); x1 = c_re( ARR3D( a, i, j, 0, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 4, lda1, lda2 ) ); y1 = c_im( ARR3D( a, i, j, 0, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 4, lda1, lda2 ) ); x2 = c_re( ARR3D( a, i, j, 2, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 6, lda1, lda2 ) ); y2 = c_im( ARR3D( a, i, j, 2, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 6, lda1, lda2 ) ); x3 = c_im( ARR3D( a, i, j, 2, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 6, lda1, lda2 ) ); y3 = c_re( ARR3D( a, i, j, 6, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 2, lda1, lda2 ) ); u0 = x0 + x2; v0 = y0 + y2; u1 = x0 - x2; v1 = y0 - y2; x4 = c_re( ARR3D( a, i, j, 1, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 5, lda1, lda2 ) ); y4 = c_im( ARR3D( a, i, j, 1, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 5, lda1, lda2 ) ); x5 = c_re( ARR3D( a, i, j, 1, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 5, lda1, lda2 ) ); y5 = c_im( ARR3D( a, i, j, 1, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 5, lda1, lda2 ) ); x6 = c_re( ARR3D( a, i, j, 3, lda1, lda2 ) ) + c_re( ARR3D( a, i, j, 7, lda1, lda2 ) ); y6 = c_im( ARR3D( a, i, j, 3, lda1, lda2 ) ) + c_im( ARR3D( a, i, j, 7, lda1, lda2 ) ); x7 = c_re( ARR3D( a, i, j, 3, lda1, lda2 ) ) - c_re( ARR3D( a, i, j, 7, lda1, lda2 ) ); y7 = c_im( ARR3D( a, i, j, 3, lda1, lda2 ) ) - c_im( ARR3D( a, i, j, 7, lda1, lda2 ) ); u2 = x4 + x6; v2 = y4 + y6; u3 = y4 - y6; v3 = x6 - x4; c_re( ARR3D( b, i, 0, j, ldb1, ldb2 ) ) = u0 + u2; c_im( ARR3D( b, i, 0, j, ldb1, ldb2 ) ) = v0 + v2; c_re( ARR3D( b, i, 4, j, ldb1, ldb2 ) ) = wr4 * (u0-u2) - wi4 * (v0-v2); c_im( ARR3D( b, i, 4, j, ldb1, ldb2 ) ) = wr4 * (v0-v2) + wi4 * (u0-u2); c_re( ARR3D( b, i, 2, j, ldb1, ldb2 ) ) = wr2 * (u1+u3) - wi2 * (v1+v3); c_im( ARR3D( b, i, 2, j, ldb1, ldb2 ) ) = wr2 * (v1+v3) + wi2 * (u1+u3); c_re( ARR3D( b, i, 6, j, ldb1, ldb2 ) ) = wr6 * (u1-u3) - wi6 * (v1-v3); c_im( ARR3D( b, i, 6, j, ldb1, ldb2 ) ) = wr6 * (v1-v3) + wi6 * (u1-u3); u0 = x1 + c81 * (x5 - x7); v0 = y1 + c81 * (y5 - y7); u1 = x1 - c81 * (x5 - x7); v1 = y1 - c81 * (y5 - y7); u2 = x3 + c81 * (y5 + y7); v2 = y3 - c81 * (x5 + x7); u3 = x3 - c81 * (y5 + y7); v3 = y3 + c81 * (x5 + x7); c_re( ARR3D( b, i, 1, j, ldb1, ldb2 ) ) = wr1 * (u0+u2) - wi1 * (v0+v2); c_im( ARR3D( b, i, 1, j, ldb1, ldb2 ) ) = wr1 * (v0+v2) + wi1 * (u0+u2); c_re( ARR3D( b, i, 5, j, ldb1, ldb2 ) ) = wr5 * (u1+u3) - wi5 * (v1+v3); c_im( ARR3D( b, i, 5, j, ldb1, ldb2 ) ) = wr5 * (v1+v3) + wi5 * (u1+u3); c_re( ARR3D( b, i, 3, j, ldb1, ldb2 ) ) = wr3 * (u1-u3) - wi3 * (v1-v3); c_im( ARR3D( b, i, 3, j, ldb1, ldb2 ) ) = wr3 * (v1-v3) + wi3 * (u1-u3); c_re( ARR3D( b, i, 7, j, ldb1, ldb2 ) ) = wr7 * (u0-u2) - wi7 * (v0-v2); c_im( ARR3D( b, i, 7, j, ldb1, ldb2 ) ) = wr7 * (v0-v2) + wi7 * (u0-u2); } } } static void fft3a(fftw_complex *a, fftw_complex *b, fftw_complex *w, int l) { int j; double x0, x1, x2; double y0, y1, y2; double wr1, wr2; double wi1, wi2; double c31 = 0.86602540378443865, c32 = 0.5; for (j = 0; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2=wr1*wr1-wi1*wi1; wi2=wr1*wi1+wr1*wi1; x0 = c_re( ARR2D( a, j, 1, l ) ) + c_re( ARR2D( a, j, 2, l ) ); y0 = c_im( ARR2D( a, j, 1, l ) ) + c_im( ARR2D( a, j, 2, l ) ); x1 = c_re( ARR2D( a, j, 0, l ) ) - c32 * x0; y1 = c_im( ARR2D( a, j, 0, l ) ) - c32 * y0; x2 = c31 * ( c_im( ARR2D( a, j, 1, l ) ) - c_im( ARR2D( a, j, 2, l ) )); y2 = c31 * ( c_re( ARR2D( a, j, 2, l ) ) - c_re( ARR2D( a, j, 1, l ) )); c_re( ARR2D( b, 0, j, 3 ) ) = c_re( ARR2D( a, j, 0, l ) ) + x0; c_im( ARR2D( b, 0, j, 3 ) ) = c_im( ARR2D( a, j, 0, l ) ) + y0; c_re( ARR2D( b, 1, j, 3 ) ) = wr1*(x1+x2)-wi1*(y1+y2); c_im( ARR2D( b, 1, j, 3 ) ) = wr1*(y1+y2)+wi1*(x1+x2); c_re( ARR2D( b, 2, j, 3 ) ) = wr2*(x1-x2)-wi2*(y1-y2); c_im( ARR2D( b, 2, j, 3 ) ) = wr2*(y1-y2)+wi2*(x1-x2); } } static void fft3b(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { int i, j; double x0, x1, x2; double y0, y1, y2; double wr1, wr2; double wi1, wi2; double c31 = 0.86602540378443865, c32 = 0.5; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, 0, 1, m, l ) ) + c_re( ARR3D( a, i, 0, 2, m, l ) ); y0 = c_im( ARR3D( a, i, 0, 1, m, l ) ) + c_im( ARR3D( a, i, 0, 2, m, l ) ); x1 = c_re( ARR3D( a, i, 0, 0, m, l ) ) - c32 * x0; y1 = c_im( ARR3D( a, i, 0, 0, m, l ) ) - c32 * y0; x2 = c31 * (c_im( ARR3D( a, i, 0, 1, m, l ) ) - c_im( ARR3D( a, i, 0, 2, m, l ) )); y2 = c31 * (c_re( ARR3D( a, i, 0, 2, m, l ) ) - c_re( ARR3D( a, i, 0, 1, m, l ) )); c_re( ARR3D( b, i, 0, 0, m, 3 ) ) = c_re( ARR3D( a, i, 0, 0, m, l ) ) + x0; c_im( ARR3D( b, i, 0, 0, m, 3 ) ) = c_im( ARR3D( a, i, 0, 0, m, l ) ) + y0; c_re( ARR3D( b, i, 1, 0, m, 3 ) ) = x1 + x2; c_im( ARR3D( b, i, 1, 0, m, 3 ) ) = y1 + y2; c_re( ARR3D( b, i, 2, 0, m, 3 ) ) = x1 - x2; c_im( ARR3D( b, i, 2, 0, m, 3 ) ) = y1 - y2; } for (j = 1; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2=wr1*wr1-wi1*wi1; wi2=wr1*wi1+wr1*wi1; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, j, 1, m, l ) ) + c_re( ARR3D( a, i, j, 2, m, l ) ); y0 = c_im( ARR3D( a, i, j, 1, m, l ) ) + c_im( ARR3D( a, i, j, 2, m, l ) ); x1 = c_re( ARR3D( a, i, j, 0, m, l ) ) - c32 * x0; y1 = c_im( ARR3D( a, i, j, 0, m, l ) ) - c32 * y0; x2 = c31 * (c_im( ARR3D( a, i, j, 1, m, l ) ) - c_im( ARR3D( a, i, j, 2, m, l ) )); y2 = c31 * (c_re( ARR3D( a, i, j, 2, m, l ) ) - c_re( ARR3D( a, i, j, 1, m, l ) )); c_re( ARR3D( b, i, 0, j, m, 3 ) ) = c_re( ARR3D( a, i, j, 0, m, l ) ) + x0; c_im( ARR3D( b, i, 0, j, m, 3 ) ) = c_im( ARR3D( a, i, j, 0, m, l ) ) + y0; c_re( ARR3D( b, i, 1, j, m, 3 ) ) = wr1*(x1+x2)-wi1*(y1+y2); c_im( ARR3D( b, i, 1, j, m, 3 ) ) = wr1*(y1+y2)+wi1*(x1+x2); c_re( ARR3D( b, i, 2, j, m, 3 ) ) = wr2*(x1-x2)-wi2*(y1-y2); c_im( ARR3D( b, i, 2, j, m, 3 ) ) = wr2*(y1-y2)+wi2*(x1-x2); } } } static void fft5a(fftw_complex *a, fftw_complex *b, fftw_complex *w, int l) { int j; double wr1, wr2, wr3, wr4; double wi1, wi2, wi3, wi4; double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10; double y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10; double c51 = 0.95105651629515357, c52 = 0.61803398874989485; double c53 = 0.55901699437494742, c54 = 0.25; for (j = 0; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2=wr1*wr1-wi1*wi1; wi2=wr1*wi1+wr1*wi1; wr3=wr1*wr2-wi1*wi2; wi3=wr1*wi2+wi1*wr2; wr4=wr2*wr2-wi2*wi2; wi4=wr2*wi2+wr2*wi2; x0 = c_re( ARR2D( a, j, 1, l ) ) + c_re( ARR2D( a, j, 4, l ) ); y0 = c_im( ARR2D( a, j, 1, l ) ) + c_im( ARR2D( a, j, 4, l ) ); x1 = c_re( ARR2D( a, j, 2, l ) ) + c_re( ARR2D( a, j, 3, l ) ); y1 = c_im( ARR2D( a, j, 2, l ) ) + c_im( ARR2D( a, j, 3, l ) ); x2 = c51 * (c_re( ARR2D( a, j, 1, l ) ) - c_re( ARR2D( a, j, 4, l ) )); y2 = c51 * (c_im( ARR2D( a, j, 1, l ) ) - c_im( ARR2D( a, j, 4, l ) )); x3 = c51 * (c_re( ARR2D( a, j, 2, l ) ) - c_re( ARR2D( a, j, 3, l ) )); y3 = c51 * (c_im( ARR2D( a, j, 2, l ) ) - c_im( ARR2D( a, j, 3, l ) )); x4 = x0 + x1; y4 = y0 + y1; x5 = c53 * (x0-x1); y5 = c53 * (y0-y1); x6 = c_re( ARR2D( a, j, 0, l ) ) - c54 * x4; y6 = c_im( ARR2D( a, j, 0, l ) ) - c54 * y4; x7 = x6 + x5; y7 = y6 + y5; x8 = x6 - x5; y8 = y6 - y5; x9 = y2 + c52*y3; y9 = -x2 - c52*x3; x10 = c52*y2 - y3; y10 = x3 - c52*x2; c_re( ARR2D( b, 0, j, 5 ) ) = c_re( ARR2D( a, j, 0, l ) ) + x4; c_im( ARR2D( b, 0, j, 5 ) ) = c_im( ARR2D( a, j, 0, l ) ) + y4; c_re( ARR2D( b, 1, j, 5 ) ) = wr1 * (x7+x9) - wi1 * (y7+y9); c_im( ARR2D( b, 1, j, 5 ) ) = wr1 * (y7+y9) + wi1 * (x7+x9); c_re( ARR2D( b, 2, j, 5 ) ) = wr2 * (x8+x10) - wi2 * (y8+y10); c_im( ARR2D( b, 2, j, 5 ) ) = wr2 * (y8+y10) + wi2 * (x8+x10); c_re( ARR2D( b, 3, j, 5 ) ) = wr3 * (x8-x10) - wi3 * (y8-y10); c_im( ARR2D( b, 3, j, 5 ) ) = wr3 * (y8-y10) + wi3 * (x8-x10); c_re( ARR2D( b, 4, j, 5 ) ) = wr4 * (x7-x9) - wi4 * (y7-y9); c_im( ARR2D( b, 4, j, 5 ) ) = wr4 * (y7-y9) + wi4 * (x7-x9); } } static void fft5b(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { int i, j; double x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10; double y0, y1, y2, y3, y4, y5, y6, y7, y8, y9, y10; double wr1, wr2, wr3, wr4; double wi1, wi2, wi3, wi4; double c51 = 0.95105651629515357, c52 = 0.61803398874989485; double c53 = 0.55901699437494742, c54 = 0.25; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, 0, 1, m, l ) ) + c_re( ARR3D( a, i, 0, 4, m, l ) ); y0 = c_im( ARR3D( a, i, 0, 1, m, l ) ) + c_im( ARR3D( a, i, 0, 4, m, l ) ); x1 = c_re( ARR3D( a, i, 0, 2, m, l ) ) + c_re( ARR3D( a, i, 0, 3, m, l ) ); y1 = c_im( ARR3D( a, i, 0, 2, m, l ) ) + c_im( ARR3D( a, i, 0, 3, m, l ) ); x2 = c51 * (c_re( ARR3D( a, i, 0, 1, m, l ) ) - c_re( ARR3D( a, i, 0, 4, m, l ) )); y2 = c51 * (c_im( ARR3D( a, i, 0, 1, m, l ) ) - c_im( ARR3D( a, i, 0, 4, m, l ) )); x3 = c51 * (c_re( ARR3D( a, i, 0, 2, m, l ) ) - c_re( ARR3D( a, i, 0, 3, m, l ) )); y3 = c51 * (c_im( ARR3D( a, i, 0, 2, m, l ) ) - c_im( ARR3D( a, i, 0, 3, m, l ) )); x4 = x0 + x1; y4 = y0 + y1; x5 = c53 * (x0-x1); y5 = c53 * (y0-y1); x6 = c_re( ARR3D( a, i, 0, 0, m, l ) ) - c54 * x4; y6 = c_im( ARR3D( a, i, 0, 0, m, l ) ) - c54 * y4; x7 = x6 + x5; y7 = y6 + y5; x8 = x6 - x5; y8 = y6 - y5; x9 = y2 + c52 * y3; y9 = -x2 - c52 * x3; x10 = c52 * y2 - y3; y10 = x3 - c52 * x2; c_re( ARR3D( b, i, 0, 0, m, 5 ) ) = c_re( ARR3D( a, i, 0, 0, m, l ) ) + x4; c_im( ARR3D( b, i, 0, 0, m, 5 ) ) = c_im( ARR3D( a, i, 0, 0, m, l ) ) + y4; c_re( ARR3D( b, i, 1, 0, m, 5 ) ) = x7 + x9; c_im( ARR3D( b, i, 1, 0, m, 5 ) ) = y7 + y9; c_re( ARR3D( b, i, 2, 0, m, 5 ) ) = x8 + x10; c_im( ARR3D( b, i, 2, 0, m, 5 ) ) = y8 + y10; c_re( ARR3D( b, i, 3, 0, m, 5 ) ) = x8 - x10; c_im( ARR3D( b, i, 3, 0, m, 5 ) ) = y8 - y10; c_re( ARR3D( b, i, 4, 0, m, 5 ) ) = x7 - x9; c_im( ARR3D( b, i, 4, 0, m, 5 ) ) = y7 - y9; } for (j = 1; j < l; ++j) { wr1 = c_re( w[j] ); wi1 = c_im( w[j] ); wr2 = wr1 * wr1 - wi1*wi1; wi2 = wr1 * wi1 + wr1*wi1; wr3 = wr1 * wr2 - wi1*wi2; wi3 = wr1 * wi2 + wi1*wr2; wr4 = wr2 * wr2 - wi2*wi2; wi4 = wr2 * wi2 + wr2*wi2; for (i = 0; i < m; ++i) { x0 = c_re( ARR3D( a, i, j, 1, m, l ) ) + c_re( ARR3D( a, i, j, 4, m, l ) ); y0 = c_im( ARR3D( a, i, j, 1, m, l ) ) + c_im( ARR3D( a, i, j, 4, m, l ) ); x1 = c_re( ARR3D( a, i, j, 2, m, l ) ) + c_re( ARR3D( a, i, j, 3, m, l ) ); y1 = c_im( ARR3D( a, i, j, 2, m, l ) ) + c_im( ARR3D( a, i, j, 3, m, l ) ); x2 = c51 * (c_re( ARR3D( a, i, j, 1, m, l ) ) - c_re( ARR3D( a, i, j, 4, m, l ) )); y2 = c51 * (c_im( ARR3D( a, i, j, 1, m, l ) ) - c_im( ARR3D( a, i, j, 4, m, l ) )); x3 = c51 * (c_re( ARR3D( a, i, j, 2, m, l ) ) - c_re( ARR3D( a, i, j, 3, m, l ) )); y3 = c51 * (c_im( ARR3D( a, i, j, 2, m, l ) ) - c_im( ARR3D( a, i, j, 3, m, l ) )); x4 = x0 + x1; y4 = y0 + y1; x5 = c53 * (x0-x1); y5 = c53 * (y0-y1); x6 = c_re( ARR3D( a, i, j, 0, m, l ) ) - c54*x4; y6 = c_im( ARR3D( a, i, j, 0, m, l ) ) - c54*y4; x7 = x6 + x5; y7 = y6 + y5; x8 = x6 - x5; y8 = y6 - y5; x9 = y2 + c52 * y3; y9 = -x2 - c52 * x3; x10 = c52*y2 - y3; y10 = x3 - c52*x2; c_re( ARR3D( b, i, 0, j, m, 5 ) ) = c_re( ARR3D( a, i, j, 0, m, l ) ) + x4; c_im( ARR3D( b, i, 0, j, m, 5 ) ) = c_im( ARR3D( a, i, j, 0, m, l ) ) + y4; c_re( ARR3D( b, i, 1, j, m, 5 ) ) = wr1*(x7+x9) - wi1*(y7+y9); c_im( ARR3D( b, i, 1, j, m, 5 ) ) = wr1*(y7+y9) + wi1*(x7+x9); c_re( ARR3D( b, i, 2, j, m, 5 ) ) = wr2*(x8+x10) - wi2*(y8+y10); c_im( ARR3D( b, i, 2, j, m, 5 ) ) = wr2*(y8+y10) + wi2*(x8+x10); c_re( ARR3D( b, i, 3, j, m, 5 ) ) = wr3*(x8-x10) - wi3*(y8-y10); c_im( ARR3D( b, i, 3, j, m, 5 ) ) = wr3*(y8-y10) + wi3*(x8-x10); c_re( ARR3D( b, i, 4, j, m, 5 ) ) = wr4*(x7-x9) - wi4*(y7-y9); c_im( ARR3D( b, i, 4, j, m, 5 ) ) = wr4*(y7-y9) + wi4*(x7-x9); } } } static void fft3(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { if (1 == m) fft3a( a, b, w, l ); else fft3b( a, b, w, m, l ); } static void fft4(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { if (1 == m) fft4a( a, b, w, l ); else fft4b( a, b, w, m, l ); } static void fft5(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { if (1 == m) fft5a( a, b, w, l ); else fft5b( a, b, w, m, l ); } static void fft8(fftw_complex *a, fftw_complex *b, fftw_complex *w, int m, int l) { if (1 == m) fft8a( a, b, w, l ); else fft8b( a, b, w, m, l ); } int HPCC_fft235(fftw_complex *a, fftw_complex *b, fftw_complex *w, int n, const int *ip) { int j, k, l, m, key, kp4, kp8; if (ip[0] != 1) { kp4 = 2 - (ip[0] + 2) % 3; kp8 = (ip[0]-kp4) / 3; } else { kp4 = 0; kp8 = 0; } key = 1; j = 0; l = n; m = 1; for (k = 0; k < kp8; ++k) { l >>= 3; /* divide by 8 */ if (l >= 2) { if (key > 0) fft8( a, b, w + j, m, l ); else fft8( b, a, w + j, m, l ); key = -key; } else { if (key > 0) fft8( a, a, w + j, m, l ); else fft8( b, a, w + j, m, l ); } m <<= 3; /* multiply by 8 */ j += l; } for (k = 0; k < ip[2]; ++k) { l /= 5; if (l >= 2) { if (key > 0) fft5( a, b, w+j, m, l ); else fft5( b, a, w+j, m, l ); key = -key; } else { if (key > 0) fft5( a, a, w+j, m, l ); else fft5( b, a, w+j, m, l ); } m *= 5; j += l; } for (k = 0; k < kp4; ++k) { l >>= 2; /* divide by 4 */ if (l >= 2) { if (key > 0) fft4( a, b, w + j, m, l ); else fft4( b, a, w + j, m, l ); key = -key; } else { if (key > 0) fft4( a, a, w + j, m, l ); else fft4( b, a, w + j, m, l ); } m <<= 2; /* multiply by 4 */ j += l; } for (k = 0; k < ip[1]; ++k) { l /= 3; if (l >= 2) { if (key > 0) fft3( a, b, w+j, m, l ); else fft3( b, a, w+j, m, l ); key = -key; } else { if (key > 0) fft3( a, a, w+j, m, l ); else fft3( b, a, w+j, m, l ); } m *= 3; j += l; } if (ip[0] == 1) { if (key > 0) fft2( a, a, m ); else fft2( b, a, m ); } return 0; } static int settbl0(fftw_complex *w, int m, int l) { int i; double pi2, px; pi2 = 8.0 * atan(1.0); px = -pi2 / m / l; for (i = 0; i < l; ++i) { c_re(w[i]) = cos(px * i); c_im(w[i]) = sin(px * i); } return 0; } int HPCC_settbl(fftw_complex *w, int n) { int j, k, l, kp4, kp8; int ip[3]; HPCC_factor235( n, ip ); if (1 != ip[0]) { kp4 = 2 - (ip[0] + 2) % 3; kp8 = (ip[0]-kp4) / 3; } else { kp4 = 0; kp8 = 0; } j = 0; l = n; for (k = 0; k < kp8; ++k) { l >>= 3; /* divide by 8 */ settbl0( w + j, 8, l ); j += l; } for (k = 0; k < ip[2]; ++k) { l /= 5; settbl0( w + j, 5, l ); j += l; } for (k = 0; k < kp4; ++k) { l >>= 2; /* divide by 4 */ settbl0( w + j, 4, l ); j += l; } for (k = 0; k < ip[1]; ++k) { l /= 3; settbl0( w + j, 3, l ); j += l; } return 0; } /* settbl */ int HPCC_factor235(int n, int *ip) { ip[0] = ip[1] = ip[2] = 0; if (n % 2 != 0 && n % 3 != 0 && n % 5 != 0) return 1; if (n <= 1) return 1; /* count all 2 factors */ for (; n > 1 && ! (n & 1); n >>= 1) ip[0]++; /* count all 3 factors */ for (; n > 1 && ! (n % 3); n /= 3) ip[1]++; /* count all 5 factors */ for (; n > 1 && ! (n % 5); n /= 5) ip[2]++; if (n != 1) return 1; return 0; } int HPCC_factor235_8(s64Int_t n, int *ip) { ip[0] = ip[1] = ip[2] = 0; if (n % 2 != 0 && n % 3 != 0 && n % 5 != 0) return 1; if (n <= 1) return 1; /* count all 2 factors */ for (; n > 1 && ! (n & 1); n >>= 1) ip[0]++; /* count all 3 factors */ for (; n > 1 && ! (n % 3); n /= 3) ip[1]++; /* count all 5 factors */ for (; n > 1 && ! (n % 5); n /= 5) ip[2]++; if (n != 1) return 1; return 0; } hpcc-1.4.1/FFT/hpccfft.h0000644000000000000000000000346011256503657011543 00000000000000 #include #define FFTE_NDA2 65536 #define FFTE_NDA3 4096 #define FFTE_NDA4 256 /* Parameters that affect performance */ /* Blocking parameter. Suggested values: 8 for Pentium III and Athlon 16 for Pentium4, Athlon XP, Opteron, Itanium and Itanium2 */ #ifndef FFTE_NBLK #define FFTE_NBLK 16 #endif /* Padding parameter to avoid cache conflicts. Suggested values: 2 for Pentium III 4 for Athlon, Athlon XP, Opteron, Itanium 8 for Pentium4 and Itanium2 */ #ifndef FFTE_NP #define FFTE_NP 8 #endif /* Size of Level 2 cache */ #ifndef FFTE_L2SIZE #define FFTE_L2SIZE 1048576 #endif #ifdef LONG_IS_64BITS typedef unsigned long u64Int_t; typedef long s64Int_t; #else typedef unsigned long long u64Int_t; typedef long long s64Int_t; #endif #include "wrapfftw.h" extern int HPCC_ipow(int x, int p); extern int HPCC_zfft1d(int n, fftw_complex *a, fftw_complex *b, int iopt, hpcc_fftw_plan p); extern int HPCC_fft235(fftw_complex *a, fftw_complex *b, fftw_complex *w, int n, const int *ip); extern int HPCC_settbl(fftw_complex *w, int n); extern int HPCC_factor235(int n, int *ip); extern int HPCC_factor235_8(s64Int_t n, int *ip); extern int HPCC_bcnrand(u64Int_t n, u64Int_t a, void *x); #define ARR2D(a, i, j, lda) a[(i)+(j)*(lda)] #define PTR2D(a, i, j, lda) (a+(i)+(j)*(lda)) #define ARR3D(a, i, j, k, lda1, lda2) a[(i)+(lda1)*((j)+(k)*(lda2))] #define PTR3D(a, i, j, k, lda1, lda2) (a+(i)+(lda1)*((j)+(k)*(lda2))) #define ARR4D(a, i, j, k, l, lda1, lda2, lda3) a[(i)+(lda1)*((j)+(lda2)*((k)+(lda3)*(l)))] #define c_mul3v(v, v1, v2) c_re(v) = c_re(v1) * c_re(v2) - c_im(v1) * c_im(v2); c_im(v) = c_re(v1) * c_im(v2) + c_im(v1) * c_re(v2) #define c_assgn(d, s) c_re(d)=c_re(s);c_im(d)=c_im(s) #define V3MIN(r, e, v) r = e; V2MIN(r, v) #define V2MIN(r, v) r = v < r ? v : r #define EMAX(d, v, e) d=e; d=d>v?d:v hpcc-1.4.1/FFT/mpifft.c0000644000000000000000000001513311403763471011402 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* mpifft.c */ #include #include "hpccfft.h" #include "wrapmpifftw.h" double *HPCC_fft_timings_forward, *HPCC_fft_timings_backward; static void MPIFFT0(HPCC_Params *params, int doIO, FILE *outFile, MPI_Comm comm, int locN, double *UGflops, s64Int_t *Un, double *UmaxErr, int *Ufailure) { int commRank, commSize, failure, flags; s64Int_t i, n; s64Int_t locn, loc0, alocn, aloc0, tls; double maxErr, tmp1, tmp2, tmp3, t0, t1, t2, t3, Gflops; double deps; fftw_complex *inout, *work; fftw_mpi_plan p; hpcc_fftw_mpi_plan ip; int sAbort, rAbort; #ifdef USING_FFTW int ilocn, iloc0, ialocn, ialoc0, itls; #endif failure = 1; Gflops = -1.0; deps = HPL_dlamch( HPL_MACH_EPS ); maxErr = 1.0 / deps; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &commRank ); n = locN; /* number of processes have been factored out - need to put it back in */ n *= commSize; n *= commSize; /* global vector size */ #ifdef USING_FFTW /* FFTW ver. 2 only supports vector sizes that fit in 'int' */ if (n > (1<<30)-1+(1<<30)) { #ifdef HPCC_FFTW_CHECK32 goto no_plan; #else if (doIO) { fprintf( outFile, "Warning: problem size too large: %ld*%d*%d\n", (long)(n / commSize / commSize), commSize, commSize ); } #endif } #endif #ifdef HPCC_FFTW_ESTIMATE flags = FFTW_ESTIMATE; #else flags = FFTW_MEASURE; #endif t1 = -MPI_Wtime(); p = fftw_mpi_create_plan( comm, n, FFTW_FORWARD, flags ); t1 += MPI_Wtime(); if (! p) goto no_plan; #ifdef USING_FFTW fftw_mpi_local_sizes( p, &ilocn, &iloc0, &ialocn, &ialoc0, &itls ); locn = ilocn; loc0 = iloc0; alocn = ialocn; aloc0 = ialoc0; tls = itls; #else fftw_mpi_local_sizes( p, &locn, &loc0, &alocn, &aloc0, &tls ); #endif inout = (fftw_complex *)HPCC_fftw_malloc( tls * (sizeof *inout) ); work = (fftw_complex *)HPCC_fftw_malloc( tls * (sizeof *work) ); sAbort = 0; if (! inout || ! work) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, comm ); if (rAbort > 0) { fftw_mpi_destroy_plan( p ); goto comp_end; } /* Make sure that `inout' and `work' are initialized in parallel if using Open MP: this will ensure better placement of pages if first-touch policy is used by a distrubuted shared memory machine. */ #ifdef _OPENMP #pragma omp parallel for for (i = 0; i < tls; ++i) { c_re( inout[i] ) = c_re( work[i] ) = 0.0; c_re( inout[i] ) = c_im( work[i] ) = 0.0; } #endif t0 = -MPI_Wtime(); HPCC_bcnrand( 2 * tls, 53 * commRank * 2 * tls, inout ); t0 += MPI_Wtime(); t2 = -MPI_Wtime(); fftw_mpi( p, 1, inout, work ); t2 += MPI_Wtime(); fftw_mpi_destroy_plan( p ); ip = HPCC_fftw_mpi_create_plan( comm, n, FFTW_BACKWARD, FFTW_ESTIMATE ); if (ip) { t3 = -MPI_Wtime(); HPCC_fftw_mpi( ip, 1, inout, work ); t3 += MPI_Wtime(); HPCC_fftw_mpi_destroy_plan( ip ); } HPCC_bcnrand( 2 * tls, 53 * commRank * 2 * tls, work ); /* regenerate data */ maxErr = 0.0; for (i = 0; i < locn; ++i) { tmp1 = c_re( inout[i] ) - c_re( work[i] ); tmp2 = c_im( inout[i] ) - c_im( work[i] ); tmp3 = sqrt( tmp1*tmp1 + tmp2*tmp2 ); maxErr = maxErr >= tmp3 ? maxErr : tmp3; } MPI_Allreduce( &maxErr, UmaxErr, 1, MPI_DOUBLE, MPI_MAX, comm ); maxErr = *UmaxErr; if (maxErr / log(n) / deps < params->test.thrsh) failure = 0; if (t2 > 0.0) Gflops = 1e-9 * (5.0 * n * log(n) / log(2.0)) / t2; if (doIO) { fprintf( outFile, "Number of nodes: %d\n", commSize ); fprintf( outFile, "Vector size: %20.0f\n", tmp1 = (double)n ); fprintf( outFile, "Generation time: %9.3f\n", t0 ); fprintf( outFile, "Tuning: %9.3f\n", t1 ); fprintf( outFile, "Computing: %9.3f\n", t2 ); fprintf( outFile, "Inverse FFT: %9.3f\n", t3 ); fprintf( outFile, "max(|x-x0|): %9.3e\n", maxErr ); fprintf( outFile, "Gflop/s: %9.3f\n", Gflops ); } comp_end: if (work) HPCC_fftw_free( work ); if (inout) HPCC_fftw_free( inout ); no_plan: *UGflops = Gflops; *Un = n; *UmaxErr = maxErr; *Ufailure = failure; } int HPCC_MPIFFT(HPCC_Params *params) { int commRank, commSize; int locN, procCnt, isComputing, doIO, failure = 0; s64Int_t n; double Gflops = -1.0, maxErr = -1.0; MPI_Comm comm; FILE *outFile; MPI_Comm_size( MPI_COMM_WORLD, &commSize ); MPI_Comm_rank( MPI_COMM_WORLD, &commRank ); doIO = commRank == 0 ? 1 : 0; if (doIO) { outFile = fopen( params->outFname, "a" ); if (! outFile) outFile = stderr; } /* There are two vectors of size 'n'/'commSize': inout, work, and internal work: 2*'n'/'commSize'; it's 4 vectors then. FFTE requires that the global vector size 'n' has to be at least as big as square of number of processes. The square is calculated in each factor independently. In other words, 'n' has to have at least twice as many 2 factors as the process count, twice as many 3 factors and twice as many 5 factors. */ #ifdef HPCC_FFT_235 locN = 0; procCnt = commSize + 1; do { int f[3]; procCnt--; for ( ; procCnt > 1 && HPCC_factor235( procCnt, f ); procCnt--) ; /* EMPTY */ /* Make sure the local vector size is greater than 0 */ locN = HPCC_LocalVectorSize( params, 4*procCnt, sizeof(fftw_complex), 0 ); for ( ; locN >= 1 && HPCC_factor235( locN, f ); locN--) ; /* EMPTY */ } while (locN < 1); #else /* Find power of two that is smaller or equal to number of processes */ for (procCnt = 1; procCnt <= (commSize >> 1); procCnt <<= 1) ; /* EMPTY */ /* Make sure the local vector size is greater than 0 */ while (1) { locN = HPCC_LocalVectorSize( params, 4*procCnt, sizeof(fftw_complex), 1 ); if (locN) break; procCnt >>= 1; } #endif isComputing = commRank < procCnt ? 1 : 0; HPCC_fft_timings_forward = params->MPIFFTtimingsForward; HPCC_fft_timings_backward = params->MPIFFTtimingsBackward; if (commSize == procCnt) comm = MPI_COMM_WORLD; else MPI_Comm_split( MPI_COMM_WORLD, isComputing ? 0 : MPI_UNDEFINED, commRank, &comm ); if (isComputing) MPIFFT0( params, doIO, outFile, comm, locN, &Gflops, &n, &maxErr, &failure ); if (commSize != procCnt && isComputing && comm != MPI_COMM_NULL) MPI_Comm_free( &comm ); params->MPIFFT_N = n; params->MPIFFT_Procs = procCnt; params->MPIFFT_maxErr = maxErr; MPI_Bcast( &Gflops, 1, MPI_DOUBLE, 0, MPI_COMM_WORLD ); params->MPIFFTGflops = Gflops; params->FFTEnblk = FFTE_NBLK; params->FFTEnp = FFTE_NP; params->FFTEl2size = FFTE_L2SIZE; if (failure) params->Failure = 1; if (doIO) if (outFile != stderr) fclose( outFile ); return 0; } hpcc-1.4.1/FFT/onecpu.c0000644000000000000000000000547011256503657011415 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ #include int HPCC_StarFFT(HPCC_Params *params) { int commRank, commSize; int rv, errCount, failure = 0, failureAll = 0; double localGflops, minGflops, maxGflops, avgGflops; int n; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; localGflops = minGflops = maxGflops = avgGflops = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &commRank ); rv = HPCC_TestFFT( params, 0 == commRank, &localGflops, &n, &failure ); params->FFT_N = n; MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); if (failureAll) params->Failure = 1; MPI_Reduce( &localGflops, &minGflops, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &localGflops, &avgGflops, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &localGflops, &maxGflops, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); avgGflops /= commSize; MPI_Bcast( &avgGflops, 1, MPI_DOUBLE, 0, comm ); params->StarFFTGflops = avgGflops; BEGIN_IO( commRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Minimum Gflop/s %.6f\n", minGflops ); fprintf( outputFile, "Average Gflop/s %.6f\n", avgGflops ); fprintf( outputFile, "Maximum Gflop/s %.6f\n", maxGflops ); END_IO( commRank, outputFile ); return 0; } int HPCC_SingleFFT(HPCC_Params *params) { int commRank, commSize; int rv, errCount, rank, failure = 0; int n; double localGflops; double scl = 1.0 / RAND_MAX; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; localGflops = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &commRank ); srand(time(NULL)); scl *= commSize; /* select a node at random, but not node 0 (unless there is just one node) */ if (1 == commSize) rank = 0; else for (rank = 0; ; rank = (int)(scl * rand())) { if (rank > 0 && rank < commSize) break; } MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ if (commRank == rank) /* if this node has been selected */ rv = HPCC_TestFFT( params, 0 == commRank, &localGflops, &n, &failure ); MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ errCount = rv; MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ if (failure) params->Failure = 1; /* broadcast results */ MPI_Bcast( &localGflops, 1, MPI_DOUBLE, rank, comm ); params->SingleFFTGflops = localGflops; BEGIN_IO( commRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Node selected %d\n", rank ); fprintf( outputFile, "Single FFT Gflop/s %.6f\n", localGflops ); END_IO( commRank, outputFile ); return 0; } hpcc-1.4.1/FFT/pzfft1d.c0000644000000000000000000002442011256503657011476 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* C C FFTE: A FAST FOURIER TRANSFORM PACKAGE C C (C) COPYRIGHT SOFTWARE, 2000-2004, ALL RIGHTS RESERVED C BY C DAISUKE TAKAHASHI C GRADUATE SCHOOL OF SYSTEMS AND INFORMATION ENGINEERING C UNIVERSITY OF TSUKUBA C 1-1-1 TENNODAI, TSUKUBA, IBARAKI 305-8573, JAPAN C E-MAIL: daisuke@cs.tsukuba.ac.jp C C C PARALLEL 1-D COMPLEX FFT ROUTINE C C FORTRAN77 + MPI SOURCE PROGRAM C C CALL PZFFT1D(A,B,W,N,ICOMM,ME,NPU,IOPT) C C W((N/NPU)*3/2) IS SINE/COSINE TABLE (COMPLEX*16) C N IS THE LENGTH OF THE TRANSFORMS (INTEGER*8) C ----------------------------------- C N = (2**IP) * (3**IQ) * (5**IR) C ----------------------------------- C ICOMM IS THE COMMUNICATOR (INTEGER*4) C ME IS THE RANK (INTEGER*4) C NPU IS THE NUMBER OF PROCESSORS (INTEGER*4) C IOPT = 0 FOR INITIALIZING THE COEFFICIENTS (INTEGER*4) C IOPT = -1 FOR FORWARD TRANSFORM WHERE C A(N/NPU) IS COMPLEX INPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE A(BLOCK) C B(N/NPU) IS COMPLEX OUTPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE B(BLOCK) C IOPT = +1 FOR INVERSE TRANSFORM WHERE C A(N/NPU) IS COMPLEX INPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE A(BLOCK) C B(N/NPU) IS COMPLEX OUTPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE B(BLOCK) C IOPT = -2 FOR FORWARD TRANSFORM WHERE C A(N/NPU) IS COMPLEX INPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE A(BLOCK) C B(N/NPU) IS COMPLEX OUTPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE B(CYCLIC) C IOPT = +2 FOR INVERSE TRANSFORM WHERE C A(N/NPU) IS COMPLEX INPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE A(CYCLIC) C B(N/NPU) IS COMPLEX OUTPUT VECTOR (COMPLEX*16) C!HPF$ DISTRIBUTE B(BLOCK) C C WRITTEN BY DAISUKE TAKAHASHI C */ #include "hpccfft.h" #include "wrapmpifftw.h" static void ztrans(fftw_complex *a, fftw_complex *b, int n1, int n2) { int ii, jj, i, j; int tmin1, tmin2; int lda, ldb; lda = n1; ldb = n2; #ifdef _OPENMP #pragma omp parallel for private(i,j,jj,tmin1,tmin2) #endif for (ii = 0; ii < n1; ii += FFTE_NBLK) for (jj = 0; jj < n2; jj += FFTE_NBLK) { V3MIN( tmin1, ii + FFTE_NBLK, n1 ); for (i = ii; i < tmin1; ++i) { V3MIN( tmin2, jj + FFTE_NBLK, n2 ); for (j = jj; j < tmin2; ++j) { c_assgn( ARR2D( b, j, i, ldb ), ARR2D( a, i, j, lda ) ); } } } } /* ztrans */ static void pztrans(fftw_complex *a, fftw_complex *b, s64Int_t nn, hpcc_fftw_mpi_plan p, int npu) { int i, nn2; nn2 = nn / npu; if (1 == npu) for (i = 0; i < nn2; ++i) { c_assgn( b[i], a[i] ); } else MPI_Alltoall( a, nn2, p->cmplx, b, nn2, p->cmplx, p->comm ); } /* pztrans */ static void pzfft1d0(fftw_complex *a, fftw_complex *a2, fftw_complex *apxyz, fftw_complex *axyzp, fftw_complex *b, fftw_complex *bxyzp, fftw_complex *bzyx, fftw_complex *cy, fftw_complex *cz, fftw_complex *d, fftw_complex *wx, fftw_complex *wy, fftw_complex *wz, fftw_complex *ww, fftw_complex *www, int nx, int ny, int nz, hpcc_fftw_mpi_plan p, int npu, const int *lnx, const int *lny, const int *lnz) { int i, j, k, l, ii, jj, kk; int tmin1, tmin2, tmin3; int nnx, nnz; s64Int_t nn; int ldcz, lda2_1, lda2_2, ldaxyzp1, ldaxyzp2, ldaxyzp3, ldbxyzp1, ldbxyzp2, ldbxyzp3, ldww, ldcy; int ldwww1, ldwww2, ldwww3, ldapxyz1, ldapxyz2, ldapxyz3, ldbzyx1, ldbzyx2, lda1, lda2; fftw_complex ztmp1, ztmp2, ztmp3; ldcz = nz + FFTE_NP; lda2_1 = nx / npu; lda2_2 = ny; ldaxyzp1 = nx / npu; ldaxyzp2 = ny; ldaxyzp3 = nz / npu; ldbxyzp1 = nx / npu; ldbxyzp2 = ny; ldbxyzp3 = nz / npu; ldww = ny; ldcy = ny + FFTE_NP; ldwww1 = npu; ldwww2 = nx / npu; ldwww3 = ny; ldapxyz1 = npu; ldapxyz2 = nx / npu; ldapxyz3 = ny; ldbzyx1 = nz / npu; ldbzyx2 = ny; lda1 = nx; lda2 = ny; nnx = nx / npu; nnz = nz / npu; nn = (s64Int_t)nx * ny * nz / npu; #ifdef _OPENMP #pragma omp for private(i,k,l,ii,kk,tmin1,tmin2) #endif for (j = 0; j < ny; ++j) { for (ii = 0; ii < nnx; ii += FFTE_NBLK) { for (kk = 0; kk < nz; kk += FFTE_NBLK) { V3MIN( tmin1, ii + FFTE_NBLK, nnx ); for (i = ii; i < tmin1; ++i) { V3MIN( tmin2, kk + FFTE_NBLK, nz ); for (k = kk; k < tmin2; ++k) { c_assgn( ARR2D( cz, k, i-ii, ldcz ), ARR3D( a2, i, j, k, lda2_1, lda2_2 ) ); } } } V3MIN( tmin2, ii + FFTE_NBLK, nnx ); for (i = ii; i < tmin2; ++i) HPCC_fft235( PTR2D( cz, 0, i-ii, ldcz ), d, wz, nz, lnz ); for (l = 0; l < npu; ++l) { for (k = 0; k < nnz; ++k) { /* reusing tmin2 from above */ for (i = ii; i < tmin2; ++i) { c_assgn( ARR4D( axyzp, i, j, k, l, ldaxyzp1, ldaxyzp2, ldaxyzp3 ), ARR2D( cz, l + k*npu, i-ii, ldcz ) ); } } } } } #ifdef _OPENMP #pragma omp single { #endif p->timings[3] = MPI_Wtime(); pztrans( a, b, nn, p, npu ); p->timings[4] = MPI_Wtime(); #ifdef _OPENMP } #endif #ifdef _OPENMP #pragma omp for private(i,j,l,ii,jj,kk,tmin1,tmin2) #endif for (k = 0; k < nnz; ++k) { for (l = 0; l < npu; ++l) { for (ii = 0; ii < nnx; ii += FFTE_NBLK) { for (jj = 0; jj < ny; jj += FFTE_NBLK) { V3MIN( tmin1, ii + FFTE_NBLK, nnx ); for (i = ii; i < tmin1; ++i) { V3MIN( tmin2, jj + FFTE_NBLK, ny ); for (j = jj; j < tmin2; ++j) { c_assgn( ztmp1, ARR4D( bxyzp, i, j, k, l, ldbxyzp1, ldbxyzp2, ldbxyzp3 ) ); c_assgn( ztmp2, ARR2D( ww, j, k, ldww ) ); c_mul3v(ztmp3, ztmp1, ztmp2); c_assgn( ARR2D( cy, j, i-ii, ldcy ), ztmp3 ); } } } V3MIN( tmin1, ii + FFTE_NBLK, nnx ); for (i = ii; i < tmin1; ++i) HPCC_fft235( PTR2D( cy, 0, i-ii, ldcy ), d, wy, ny, lny ); for (j = 0; j < ny; ++j) { V3MIN( tmin1, ii + FFTE_NBLK, nnx ); for (i = ii; i < tmin1; ++i) { c_assgn( ztmp1, ARR2D( cy, j, i-ii, ldcy ) ); c_assgn( ztmp2, ARR4D( www, l, i, j, k, ldwww1, ldwww2, ldwww3 ) ); c_mul3v(ztmp3, ztmp1, ztmp2); c_assgn( ARR4D( apxyz, l, i, j, k, ldapxyz1, ldapxyz2, ldapxyz3 ), ztmp3 ); } } } } for (j = 0; j < ny; ++j) HPCC_fft235( PTR3D( a, 0, j, k, lda1, lda2 ), d, wx, nx, lnx ); } #ifdef _OPENMP #pragma omp for private(i,j,k,jj,kk,tmin1,tmin2,tmin3) #endif for (ii = 0; ii < nx; ii += FFTE_NBLK) { for (jj = 0; jj < ny; jj += FFTE_NBLK) { for (kk = 0; kk < nnz; kk += FFTE_NBLK) { V3MIN( tmin1, ii + FFTE_NBLK, nx ); for (i = ii; i < tmin1; ++i) { V3MIN( tmin2, jj + FFTE_NBLK, ny ); for (j = jj; j < tmin2; ++j) { V3MIN( tmin3, kk + FFTE_NBLK, nnz ); for (k = kk; k < tmin3; ++k) { c_assgn( ARR3D( bzyx, k, j, i, ldbzyx1, ldbzyx2 ), ARR3D( a, i, j, k, lda1, lda2 ) ); } } } } } } } /* pzfft1d0 */ static void psettbl2(fftw_complex *w, int ny, int nz, int me, int npu) { int j, k; int ldw; double pi2, px; int tmin1; ldw = ny; pi2 = 8.0 * atan(1.0); px = -pi2 / ((double)ny * nz); tmin1 = nz / npu; #ifdef _OPENMP #pragma omp parallel for private(j) #endif for (k = 0; k < tmin1; ++k) for (j = 0; j < ny; ++j) { c_re( ARR2D( w, j, k, ldw ) ) = cos(px * j * (me + (double)k * npu)); c_im( ARR2D( w, j, k, ldw ) ) = sin(px * j * (me + (double)k * npu)); } } /* psettbl2 */ static void psettbl3(fftw_complex *w, int nx, int ny, int nz, int me, int npu) { int i, j, k; int ldw1, ldw2; int tmin1; double pi2, px; ldw1 = nx; ldw2 = ny; pi2 = 8.0 * atan(1.0); px = -pi2 / ((double)nx * ny * nz); tmin1 = nz / npu; #ifdef _OPENMP #pragma omp parallel for private(i,j) #endif for (k = 0; k < tmin1; ++k) for (j = 0; j < ny; ++j) for (i = 0; i < nx; ++i) { c_re( ARR3D( w, i, j, k, ldw1, ldw2 ) ) = cos( px * i * (me + (double)k * npu + (double)j * nz)); c_im( ARR3D( w, i, j, k, ldw1, ldw2 ) ) = sin( px * i * (me + (double)k * npu + (double)j * nz)); } } /* psettbl3 */ int HPCC_pzfft1d(s64Int_t n, fftw_complex *a, fftw_complex *b, fftw_complex *w, int me, int npu, int iopt, hpcc_fftw_mpi_plan p) { int ip[3], lnx[3], lny[3], lnz[3], lnpu[3]; s64Int_t nn; int i, inn, nn2, nd, nx, ny, nz; fftw_complex *wx, *wy, *wz, *c; double dn; p->timings[0] = MPI_Wtime(); wx = p->wx; wy = p->wy; wz = p->wz; c = p->c; nn = n / npu; inn = (int)nn; nn2 = nn / npu; HPCC_factor235( npu, lnpu ); HPCC_factor235_8( n, ip ); for (i = 0; i < 3; ++i) { EMAX( lnz[i], lnpu[i], (ip[i]+1)/3 ); EMAX( lnx[i], lnpu[i], (ip[i]-lnz[i]+1)/2 ); lny[i] = ip[i] - lnx[i] - lnz[i]; } nx = HPCC_ipow( 2, lnx[0] ) * HPCC_ipow( 3, lnx[1] ) * HPCC_ipow( 5, lnx[2] ); ny = HPCC_ipow( 2, lny[0] ) * HPCC_ipow( 3, lny[1] ) * HPCC_ipow( 5, lny[2] ); nz = HPCC_ipow( 2, lnz[0] ) * HPCC_ipow( 3, lnz[1] ) * HPCC_ipow( 5, lnz[2] ); if (0 == iopt) { HPCC_settbl( wx, nx ); HPCC_settbl( wy, ny ); HPCC_settbl( wz, nz ); psettbl2( w, ny, nz, me, npu ); psettbl3( w + ny * (nz / npu), nx, ny, nz, me, npu ); return 0; } if (1 == iopt || 2 == iopt) { for (i = 0; i < inn; ++i) { c_im( a[i] ) = -c_im( a[i] ); } } p->timings[1] = MPI_Wtime(); if (-1 == iopt || 1 == iopt || -2 == iopt) { ztrans( a, b, npu, nn2 ); pztrans( b, a, nn, p, npu ); } p->timings[2] = MPI_Wtime(); nd = ((ny > nz ? ny : nz) + FFTE_NP) * FFTE_NBLK + FFTE_NP; #ifdef _OPENMP #pragma omp parallel private(c,i) { i = omp_get_thread_num(); c = p->c + i*p->c_size; #endif pzfft1d0( a, a, a, a, b, b, b, c, c, c + nd, wx, wy, wz, w, w + ny*(nz/npu), nx, ny, nz, p, npu, lnx, lny, lnz ); #ifdef _OPENMP } #endif p->timings[5] = MPI_Wtime(); if (-1 == iopt || 1 == iopt || 2 == iopt) { pztrans( b, a, nn, p, npu ); ztrans( a, b, nn2, npu ); } p->timings[6] = MPI_Wtime(); if (1 == iopt || 2 == iopt) { dn = 1.0 / n; for (i = 0; i < inn; ++i) { c_re( b[i] ) *= dn; c_im( b[i] ) *= -dn; } } p->timings[7] = MPI_Wtime(); return 0; } /* HPCC_pzfft1d */ hpcc-1.4.1/FFT/tstfft.c0000644000000000000000000000677211256503657011444 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* tstfft.c */ #include #include "hpccfft.h" static int TestFFT1(HPCC_Params *params, int doIO, FILE *outFile, double *UGflops, int *Un, int *Ufailure) { fftw_complex *in, *out; fftw_plan p; hpcc_fftw_plan ip; double Gflops = -1.0; double maxErr, tmp1, tmp2, tmp3, t0, t1, t2, t3; int i, n, flags, failure = 1; double deps = HPL_dlamch( HPL_MACH_EPS ); #ifdef HPCC_FFT_235 int f[3]; /* Need 2 vectors */ n = HPCC_LocalVectorSize( params, 2, sizeof(fftw_complex), 0 ); /* Adjust local size for factors */ for ( ; HPCC_factor235( n, f ); n--) ; /* EMPTY */ #else /* Need 2 vectors and vectors' sizes as power of 2 */ n = HPCC_LocalVectorSize( params, 2, sizeof(fftw_complex), 1 ); #endif /* need to use fftw_malloc() so that the returned pointers will be aligned properly for SSE instructions on Intel/AMD systems */ in = (fftw_complex *)HPCC_fftw_malloc( (sizeof *in) * n ); out = (fftw_complex *)HPCC_fftw_malloc( (sizeof *out) * n ); if (! in || ! out) goto comp_end; /* Make sure that `inout' and `work' are initialized in parallel if using Open MP: this will ensure better placement of pages if first-touch policy is used by a distrubuted shared memory machine. */ #ifdef _OPENMP #pragma omp parallel for for (i = 0; i < n; ++i) { c_re( in[i] ) = c_re( out[i] ) = 0.0; c_re( in[i] ) = c_im( out[i] ) = 0.0; } #endif t0 = -MPI_Wtime(); HPCC_bcnrand( 2*n, 0, in ); t0 += MPI_Wtime(); #ifdef HPCC_FFTW_ESTIMATE flags = FFTW_ESTIMATE; #else flags = FFTW_MEASURE; #endif t1 = -MPI_Wtime(); p = fftw_create_plan( n, FFTW_FORWARD, flags ); t1 += MPI_Wtime(); if (! p) goto comp_end; t2 = -MPI_Wtime(); fftw_one( p, in, out ); t2 += MPI_Wtime(); fftw_destroy_plan(p); ip = HPCC_fftw_create_plan( n, FFTW_BACKWARD, FFTW_ESTIMATE ); if (ip) { t3 = -MPI_Wtime(); HPCC_fftw_one( ip, out, in ); t3 += MPI_Wtime(); HPCC_fftw_destroy_plan( ip ); } HPCC_bcnrand( 2*n, 0, out ); /* regenerate data */ maxErr = 0.0; for (i = 0; i < n; i++) { tmp1 = c_re( in[i] ) - c_re( out[i] ); tmp2 = c_im( in[i] ) - c_im( out[i] ); tmp3 = sqrt( tmp1*tmp1 + tmp2*tmp2 ); maxErr = maxErr >= tmp3 ? maxErr : tmp3; } if (maxErr / log(n) / deps < params->test.thrsh) failure = 0; if (doIO) { fprintf( outFile, "Vector size: %d\n", n ); fprintf( outFile, "Generation time: %9.3f\n", t0 ); fprintf( outFile, "Tuning: %9.3f\n", t1 ); fprintf( outFile, "Computing: %9.3f\n", t2 ); fprintf( outFile, "Inverse FFT: %9.3f\n", t3 ); fprintf( outFile, "max(|x-x0|): %9.3e\n", maxErr ); } if (t2 > 0.0) Gflops = 1e-9 * (5.0 * n * log(n) / log(2.0)) / t2; comp_end: if (out) HPCC_fftw_free( out ); if (in) HPCC_fftw_free( in ); *UGflops = Gflops; *Un = n; *Ufailure = failure; return 0; } int HPCC_TestFFT(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure) { int rv, n, failure = 1; double Gflops; FILE *outFile; if (doIO) { outFile = fopen( params->outFname, "a" ); if (! outFile) { outFile = stderr; fprintf( outFile, "Cannot open output file.\n" ); return 1; } } n = 0; Gflops = -1.0; rv = TestFFT1( params, doIO, outFile, &Gflops, &n, &failure ); if (doIO) { fflush( outFile ); fclose( outFile ); } if (UGflops) *UGflops = Gflops; if (Un) *Un = n; if (Ufailure) *Ufailure = failure; return rv; } hpcc-1.4.1/FFT/wrapfftw.c0000644000000000000000000000375611367557031011767 00000000000000 #include #include #include "hpccfft.h" #ifdef _OPENMP #include #endif hpcc_fftw_plan HPCC_fftw_create_plan(int n, fftw_direction dir, int flags) { hpcc_fftw_plan p; fftw_complex *a = NULL, *b = NULL; p = (hpcc_fftw_plan)fftw_malloc( sizeof *p ); if (! p) return p; p->w1 = (fftw_complex *)fftw_malloc( (FFTE_NDA2/2 + FFTE_NP) * (sizeof *p->w1) ); p->w2 = (fftw_complex *)fftw_malloc( (FFTE_NDA2/2 + FFTE_NP) * (sizeof *p->w2) ); p->ww = (fftw_complex *)fftw_malloc( ((FFTE_NDA2+FFTE_NP) * 4 + FFTE_NP) * (sizeof *p->ww) ); p->c_size = (FFTE_NDA2+FFTE_NP) * (FFTE_NBLK + 1) + FFTE_NP; #ifdef _OPENMP #pragma omp parallel { #pragma omp single { int i; i = omp_get_num_threads(); p->c = (fftw_complex *)fftw_malloc( p->c_size * (sizeof *p->c) * i ); } } #else p->c = (fftw_complex *)fftw_malloc( p->c_size * (sizeof *p->c) ); #endif if (! p->w1 || ! p->w2 || ! p->ww || ! p->c) { if (p->c) fftw_free( p->c ); if (p->ww) fftw_free( p->ww ); if (p->w2) fftw_free( p->w2 ); if (p->w1) fftw_free( p->w1 ); fftw_free( p ); return NULL; } HPCC_zfft1d( n, a, b, 0, p ); p->n = n; p->dir = dir; p->flags = flags; return p; } void HPCC_fftw_destroy_plan(hpcc_fftw_plan p) { if (! p) return; fftw_free( p->c ); fftw_free( p->ww ); fftw_free( p->w2 ); fftw_free( p->w1 ); fftw_free( p ); } /* Without additional storage of size p->n there is no way to preserve FFTW 2 semantics (the `in' vector is not modified). But it doesn't matter for the calling code: it doesn't rely on this semantics. The change in semantics occured while going from FFTE 3.3 to FFTE 4.0. */ void HPCC_fftw_one(hpcc_fftw_plan p, fftw_complex *in, fftw_complex *out) { int i, n; if (FFTW_FORWARD == p->dir) HPCC_zfft1d( p->n, in, out, -1, p ); else HPCC_zfft1d( p->n, in, out, +1, p ); n = p->n; /* Copy the transform to `out' vector. */ for (i = 0; i < n; ++i) { c_assgn( out[i], in[i] ); } } hpcc-1.4.1/FFT/wrapfftw.h0000644000000000000000000000221511256503657011763 00000000000000 #ifdef USING_FFTW #include #else typedef double fftw_real; typedef struct { fftw_real re, im; } fftw_complex_orig; typedef fftw_real HPCC_Complex[2]; typedef HPCC_Complex fftw_complex; typedef enum { FFTW_FORWARD = -1, FFTW_BACKWARD = 1 } fftw_direction; #endif struct hpcc_fftw_plan_struct { fftw_complex *w1, *w2, *ww, *c; int n, c_size; int flags; fftw_direction dir; }; typedef struct hpcc_fftw_plan_struct *hpcc_fftw_plan; extern hpcc_fftw_plan HPCC_fftw_create_plan(int n, fftw_direction dir, int flags); extern void HPCC_fftw_destroy_plan(hpcc_fftw_plan plan); extern void HPCC_fftw_one(hpcc_fftw_plan plan, fftw_complex *in, fftw_complex *out); #ifndef USING_FFTW typedef struct hpcc_fftw_plan_struct *fftw_plan; #define c_re(c) ((c)[0]) #define c_im(c) ((c)[1]) #define fftw_malloc malloc #define fftw_free free /* flags for the planner */ #define FFTW_ESTIMATE (0) #define FFTW_MEASURE (1) #define FFTW_OUT_OF_PLACE (0) #define FFTW_IN_PLACE (8) #define FFTW_USE_WISDOM (16) #define fftw_create_plan HPCC_fftw_create_plan #define fftw_destroy_plan HPCC_fftw_destroy_plan #define fftw_one HPCC_fftw_one #endif hpcc-1.4.1/FFT/wrapmpifftw.c0000644000000000000000000000753511256503657012476 00000000000000 #include #include #include #include "hpccfft.h" #include "wrapmpifftw.h" #define Mmax3( a_, b_, c_ ) ( (a_) > (b_) ? ((a_) > (c_) ? (a_) : (c_)) : ((b_) > (c_) ? (b_) : (c_)) ) static int GetNXYZ(s64Int_t n, int npu) { int ip[3], lnx[3], lny[3], lnz[3], lnpu[3]; int i, nx, ny, nz, nxyz; HPCC_factor235( npu, lnpu ); HPCC_factor235_8( n, ip ); for (i = 0; i < 3; ++i) { EMAX( lnz[i], lnpu[i], (ip[i]+1)/3 ); EMAX( lnx[i], lnpu[i], (ip[i]-lnz[i]+1)/2 ); lny[i] = ip[i] - lnx[i] - lnz[i]; } nx = HPCC_ipow( 2, lnx[0] ) * HPCC_ipow( 3, lnx[1] ) * HPCC_ipow( 5, lnx[2] ); ny = HPCC_ipow( 2, lny[0] ) * HPCC_ipow( 3, lny[1] ) * HPCC_ipow( 5, lny[2] ); nz = HPCC_ipow( 2, lnz[0] ) * HPCC_ipow( 3, lnz[1] ) * HPCC_ipow( 5, lnz[2] ); nxyz = Mmax3( nx, ny, nz ); return nxyz; } hpcc_fftw_mpi_plan HPCC_fftw_mpi_create_plan(MPI_Comm comm, s64Int_t n, fftw_direction dir, int flags) { hpcc_fftw_mpi_plan p; fftw_complex *a = NULL, *b = NULL; int nxyz; int rank, size; MPI_Comm_size( comm, &size ); MPI_Comm_rank( comm, &rank ); p = (hpcc_fftw_mpi_plan)fftw_malloc( sizeof *p ); if (! p) return p; nxyz = GetNXYZ( n, size ); p->wx = (fftw_complex *)HPCC_fftw_malloc( (nxyz/2 + FFTE_NP) * (sizeof *p->wx) ); p->wy = (fftw_complex *)HPCC_fftw_malloc( (nxyz/2 + FFTE_NP) * (sizeof *p->wy) ); p->wz = (fftw_complex *)HPCC_fftw_malloc( (nxyz/2 + FFTE_NP) * (sizeof *p->wz) ); p->work = (fftw_complex *)HPCC_fftw_malloc( n / size * 3 / 2 * (sizeof *p->work) ); p->c_size = (nxyz+FFTE_NP) * (FFTE_NBLK + 1) + FFTE_NP; #ifdef _OPENMP #pragma omp parallel { #pragma omp single { int i; i = omp_get_num_threads(); p->c = (fftw_complex *)HPCC_fftw_malloc( p->c_size * (sizeof *p->c) * i ); } } #else p->c = (fftw_complex *)HPCC_fftw_malloc( p->c_size * (sizeof *p->c) ); #endif if (! p->wx || ! p->wy || ! p->wz || ! p->work || ! p->c) { if (p->c) HPCC_fftw_free( p->c ); if (p->work) HPCC_fftw_free( p->work ); if (p->wz) HPCC_fftw_free( p->wz ); if (p->wy) HPCC_fftw_free( p->wy ); if (p->wx) HPCC_fftw_free( p->wx ); fftw_free( p ); return NULL; } p->n = n; p->comm = comm; p->dir = dir; p->flags = flags; MPI_Type_contiguous( 2, MPI_DOUBLE, &p->cmplx ); MPI_Type_commit( &p->cmplx ); if (FFTW_FORWARD == p->dir) p->timings = HPCC_fft_timings_forward; else p->timings = HPCC_fft_timings_backward; HPCC_pzfft1d( n, a, b, p->work, rank, size, 0, p ); return p; } void HPCC_fftw_mpi_destroy_plan(hpcc_fftw_mpi_plan p) { if (!p) return; MPI_Type_free( &p->cmplx ); HPCC_fftw_free( p->work ); HPCC_fftw_free( p->c ); HPCC_fftw_free( p->wz ); HPCC_fftw_free( p->wy ); HPCC_fftw_free( p->wx ); fftw_free( p ); } void HPCC_fftw_mpi(hpcc_fftw_mpi_plan p, int n_fields, fftw_complex *local_data, fftw_complex *work){ int rank, size; s64Int_t n; int i, ln; MPI_Comm_size( p->comm, &size ); MPI_Comm_rank( p->comm, &rank ); n = p->n; if (FFTW_FORWARD == p->dir) HPCC_pzfft1d( n, local_data, work, p->work, rank, size, -1, p ); else HPCC_pzfft1d( n, local_data, work, p->work, rank, size, +1, p ); ln = n / size; for (i = 0; i < ln; ++i) { c_assgn( local_data[i], work[i] ); } } void HPCC_fftw_mpi_local_sizes(hpcc_fftw_mpi_plan p, s64Int_t *local_n, s64Int_t *local_start, s64Int_t *local_n_after_transform, s64Int_t *local_start_after_transform, s64Int_t *total_local_size) { int rank, size; s64Int_t n; MPI_Comm_size( p->comm, &size ); MPI_Comm_rank( p->comm, &rank ); n = p->n; if (local_n) *local_n = n / size; if (local_start) *local_start = n / size * rank; if (local_n_after_transform) *local_n_after_transform = n / size; if (local_start_after_transform) *local_start_after_transform = n / size * rank; if (total_local_size) *total_local_size = n / size; } hpcc-1.4.1/FFT/wrapmpifftw.h0000644000000000000000000000246511256503657012500 00000000000000#ifdef USING_FFTW #include #else #include typedef struct hpcc_fftw_mpi_plan_struct *fftw_mpi_plan; #define fftw_mpi_create_plan HPCC_fftw_mpi_create_plan #define fftw_mpi_destroy_plan HPCC_fftw_mpi_destroy_plan #define fftw_mpi HPCC_fftw_mpi #define fftw_mpi_local_sizes HPCC_fftw_mpi_local_sizes #endif struct hpcc_fftw_mpi_plan_struct { MPI_Comm comm; MPI_Datatype cmplx; fftw_complex *wx, *wy, *wz, *c, *work; s64Int_t n; int flags, c_size; fftw_direction dir; double *timings; }; typedef struct hpcc_fftw_mpi_plan_struct *hpcc_fftw_mpi_plan; extern hpcc_fftw_mpi_plan HPCC_fftw_mpi_create_plan(MPI_Comm comm, s64Int_t n, fftw_direction dir, int flags); extern void HPCC_fftw_mpi_destroy_plan(hpcc_fftw_mpi_plan plan); extern void HPCC_fftw_mpi(hpcc_fftw_mpi_plan p, int n_fields, fftw_complex *local_data, fftw_complex *work); extern void HPCC_fftw_mpi_local_sizes(hpcc_fftw_mpi_plan p, s64Int_t *local_n, s64Int_t *local_start, s64Int_t *local_n_after_transform, s64Int_t *local_start_after_transform, s64Int_t *total_local_size); extern int HPCC_pzfft1d(s64Int_t n, fftw_complex *a, fftw_complex *b, fftw_complex *w, int me, int npu, int iopt, hpcc_fftw_mpi_plan p); extern double *HPCC_fft_timings_forward, *HPCC_fft_timings_backward; hpcc-1.4.1/FFT/zfft1d.c0000644000000000000000000001747711256503657011334 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* C C FFTE: A FAST FOURIER TRANSFORM PACKAGE C C (C) COPYRIGHT SOFTWARE, 2000-2004, ALL RIGHTS RESERVED C BY C DAISUKE TAKAHASHI C GRADUATE SCHOOL OF SYSTEMS AND INFORMATION ENGINEERING C UNIVERSITY OF TSUKUBA C 1-1-1 TENNODAI, TSUKUBA, IBARAKI 305-8573, JAPAN C E-MAIL: daisuke@cs.tsukuba.ac.jp C C C 1-D COMPLEX FFT ROUTINE C C FORTRAN77 SOURCE PROGRAM C C CALL ZFFT1D(A,N,IOPT,B) C C A(N) IS COMPLEX INPUT/OUTPUT VECTOR (COMPLEX*16) C B(N) IS WORK VECTOR (COMPLEX*16) C N IS THE LENGTH OF THE TRANSFORMS (INTEGER*4) C ----------------------------------- C N = (2**IP) * (3**IQ) * (5**IR) C ----------------------------------- C IOPT = 0 FOR INITIALIZING THE COEFFICIENTS (INTEGER*4) C = -1 FOR FORWARD TRANSFORM C = +1 FOR INVERSE TRANSFORM C C WRITTEN BY DAISUKE TAKAHASHI C */ #include "hpccfft.h" #ifdef _OPENMP #include #endif int HPCC_ipow(int x, int p) { int i, r; if (1 == x || 0 == x) return x; if (0 == p) return 1; if (-1 == x) return (p & 1) ? -1 : 1; if (p < 0) return 0; r = 1; for (i = 0; i < p; i++) r *= x; return r; } static int zfft1d0(fftw_complex *a1, fftw_complex *a2, fftw_complex *b, fftw_complex *c, fftw_complex *d, fftw_complex *w1, fftw_complex *w2, fftw_complex *ww1, fftw_complex *ww2, fftw_complex *ww3, fftw_complex *ww4, int n1, int n2, int m1, int m2, int *ip1, int *ip2) { int lda1, lda2, ldb, ldc, ldww1, ldww2, ldww3, ldww4; int ii, ij, ij0, ik, ir, is, jj, i, j; int tmin1, tmin2, itmp1; fftw_complex ztmp1, ztmp2, ztmp3, ztmp4; lda1 = n1; lda2 = n2; ldb = n1; ldc = n2 + FFTE_NP; ldww1 = m1; ldww2 = m1; ldww3 = m2; ldww4 = n1/m1; #ifdef _OPENMP #pragma omp for private(ij,ij0,ir,jj,i,j,ik,is,ztmp1,ztmp2,ztmp3,ztmp4,tmin1,tmin2,itmp1) #endif for (ii = 0; ii < n1; ii += FFTE_NBLK) { for (jj = 0; jj < n2; jj += FFTE_NBLK) { tmin1 = ii + FFTE_NBLK; V2MIN( tmin1, n1 ); for (i = ii; i < tmin1; ++i) { tmin2 = jj + FFTE_NBLK; V2MIN( tmin2, n2 ); for (j = jj; j < tmin2; ++j) { c_assgn( ARR2D(c, j, i-ii, ldc), ARR2D(a1, i, j, lda1) ); } } } tmin1 = ii + FFTE_NBLK; V2MIN( tmin1, n1 ); for (i = ii; i < tmin1; ++i) HPCC_fft235( PTR2D(c, 0, i-ii, ldc), d, w2, n2, ip2 ); if (HPCC_ipow( 2, ip1[0] ) < FFTE_NBLK || HPCC_ipow( 2, ip2[0] ) < FFTE_NBLK) { itmp1 = n2 / m2; for (is = 0; is < itmp1; ++is) { for (ik = 0; ik < m2; ++ik) { j = ik + is * m2; tmin1 = ii + FFTE_NBLK; V2MIN( tmin1, n1 ); for (i = ii; i < tmin1; ++i) { ir = i / m1; ij = i % m1; c_assgn(ztmp1, ARR2D(c, j, i-ii, ldc)); c_assgn(ztmp2, ARR2D(ww1, ij, ik, ldww1)); c_mul3v(ztmp3, ztmp1, ztmp2); c_assgn(ztmp2, ARR2D(ww2, ij, is, ldww2)); c_mul3v(ztmp1, ztmp3, ztmp2); c_assgn(ztmp3, ARR2D(ww3, ik, ir, ldww3)); c_mul3v(ztmp2, ztmp1, ztmp3); c_assgn(ztmp1, ARR2D(ww4, ir, is, ldww4)); c_mul3v(ztmp3, ztmp2, ztmp1); c_assgn(ARR2D(b, i, j, ldb), ztmp3); } } } } else { ir = ii / m1; ij0 = ii % m1; itmp1 = n2 / m2; for (is = 0; is < itmp1; ++is) { for (ik = 0; ik < m2; ++ik) { c_assgn(ztmp1, ARR2D(ww3, ik, ir, ldww3)); c_assgn(ztmp2, ARR2D(ww4, ir, is, ldww4)); c_mul3v(ztmp4, ztmp1, ztmp2); j = ik + is * m2; ij = ij0; tmin1 = ii + FFTE_NBLK; V2MIN( tmin1, n1 ); for (i = ii; i < tmin1; ++i) { c_assgn(ztmp1, ARR2D(ww1, ij, ik, ldww1)); c_assgn(ztmp2, ARR2D(ww2, ij, is, ldww2)); c_mul3v(ztmp3, ztmp1, ztmp2); c_mul3v(ztmp1, ztmp3, ztmp4); c_assgn(ztmp2, ARR2D(c, j, i-ii, ldc)); c_mul3v(ztmp3, ztmp2, ztmp1); c_assgn(ARR2D(b, i, j, ldb), ztmp3); ++ij; } } } } } #ifdef _OPENMP #pragma omp for private(i,j,tmin1) #endif for (jj = 0; jj < n2; jj += FFTE_NBLK) { tmin1 = jj + FFTE_NBLK; V2MIN(tmin1, n2); for (j = jj; j < tmin1; ++j) { HPCC_fft235( PTR2D(b, 0, j, ldb), c, w1, n1, ip1 ); } for (i = 0; i < n1; ++i) for (j = jj; j < tmin1; ++j) { c_assgn(ARR2D(a2, j, i, lda2), ARR2D(b, i, j, ldb)); } } return 0; } static int settbls(fftw_complex *w1, fftw_complex *w2, fftw_complex *w3, fftw_complex *w4, int n1, int n2, int m1, int m2) { int j, k, is, ir; int ldw1, ldw2, ldw3, ldw4; double pi2, px; pi2 = 8.0 * atan(1.0); px = -pi2 / n1 / n2; ldw1 = m1; ldw2 = m1; ldw3 = m2; ldw4 = n1/m1; #ifdef _OPENMP #pragma omp parallel { #pragma omp for private(j, ir) #endif for (k = 0; k < m2; ++k) { for (j = 0; j < m1; ++j) { c_re(ARR2D(w1, j, k, ldw1)) = cos(px * j * k); c_im(ARR2D(w1, j, k, ldw1)) = sin(px * j * k); } for (ir = 0; ir < n1/m1; ++ir) { c_re(ARR2D(w3, k, ir, ldw3)) = cos(px * k * ir * m1); c_im(ARR2D(w3, k, ir, ldw3)) = sin(px * k * ir * m1); } } #ifdef _OPENMP #pragma omp for private(j, ir) #endif for (is = 0; is < n2/m2; ++is) { for (j = 0; j < m1; ++j) { c_re(ARR2D(w2, j, is, ldw2)) = cos(px * j * is * m2); c_im(ARR2D(w2, j, is, ldw2)) = sin(px * j * is * m2); } for (ir = 0; ir < n1/m1; ++ir) { c_re(ARR2D(w4, ir, is, ldw4)) = cos(px * ir * m1 * is * m2); c_im(ARR2D(w4, ir, is, ldw4)) = sin(px * ir * m1 * is * m2); } } #ifdef _OPENMP } #endif return 0; } /* settbls */ int HPCC_zfft1d(int n, fftw_complex *a, fftw_complex *b, int iopt, hpcc_fftw_plan p) { int i; int m1, m2, n1, n2, nd, nw2, nw3, nw4; double dn; int ip[3], ip1[3], ip2[3]; fftw_complex *w1, *w2, *ww, *c; w1 = p->w1; w2 = p->w2; ww = p->ww; c = p->c; HPCC_factor235( n, ip ); if (1 == iopt) for (i = 0; i < n; ++i) { c_im( a[i] ) = -c_im( a[i] ); } if (n <= FFTE_L2SIZE / 16 / 3 && n <= FFTE_NDA2) { if (0 == iopt) { HPCC_settbl( w1, n ); return 0; } HPCC_fft235( a, b, w1, n, ip ); } else { for (i = 0; i < 3; ++i) { ip1[i] = (ip[i] + 1) / 2; ip2[i] = ip[i] - ip1[i]; } n1 = HPCC_ipow( 2, ip1[0] ) * HPCC_ipow( 3, ip1[1] ) * HPCC_ipow( 5, ip1[2] ); n2 = HPCC_ipow( 2, ip2[0] ) * HPCC_ipow( 3, ip2[1] ) * HPCC_ipow( 5, ip2[2] ); if (HPCC_ipow( 2, ip1[0] ) < FFTE_NBLK || HPCC_ipow( 2, ip2[0] ) < FFTE_NBLK) { m1 = HPCC_ipow( 2, ip1[0] / 2 ) * HPCC_ipow( 3, ip1[1] / 2 ) * HPCC_ipow( 5, ip1[2] / 2 ); V2MIN( m1, n1 ); m2 = HPCC_ipow( 2, ip2[0] / 2 ) * HPCC_ipow( 3, ip2[1] / 2 ) * HPCC_ipow( 5, ip2[2] / 2 ); V2MIN( m2, n2 ); } else { m1 = HPCC_ipow( 2, ip1[0] / 2); m1 = FFTE_NBLK > m1 ? FFTE_NBLK : m1; V2MIN( m1, n1 ); m2 = HPCC_ipow( 2, ip2[0] / 2); m2 = FFTE_NBLK > m2 ? FFTE_NBLK : m2; V2MIN( m2, n2 ); } nw2 = m1 * m2 + FFTE_NP; nw3 = nw2 + m1 * (n2 / m2) + FFTE_NP; nw4 = nw3 + m2 * (n1 / m1) + FFTE_NP; if (0 == iopt) { HPCC_settbl( w1, n1 ); HPCC_settbl( w2, n2 ); settbls( ww, ww + nw2, ww + nw3, ww + nw4, n1, n2, m1, m2 ); return 0; } nd = (n2 + FFTE_NP) * FFTE_NBLK + FFTE_NP; #ifdef _OPENMP #pragma omp parallel private(c,i) { i = omp_get_thread_num(); c = p->c + i*p->c_size; #endif zfft1d0( a, a, b, c, c + nd, w1, w2, ww, ww + nw2, ww + nw3, ww + nw4, n1, n2, m1, m2, ip1, ip2 ); #ifdef _OPENMP } #endif } if (1 == iopt) { dn = 1.0 / (double)n; for (i = 0; i < n; ++i) { c_re( a[i] ) *= dn; c_im( a[i] ) *= -dn; } } return 0; } /* HPCC_zfft1d */ hpcc-1.4.1/Makefile0000644000000000000000000000042411256503657010773 00000000000000# -*- Makefile -*- arch = UNKNOWN include hpl/Make.$(arch) all: - $(MKDIR) hpl/lib/$(arch) ( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc ) clean: - $(MKDIR) hpl/lib/$(arch) ( $(CD) hpl/lib/arch/build ; $(MAKE) arch=$(arch) -f Makefile.hpcc clean ) hpcc-1.4.1/PTRANS/cblacslt.c0000644000000000000000000005315011403763471012275 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* cblacslt.c -- V0.0 Stripped-down BLACS routines -- University of Tennessee, October, 2003 Written by Piotr Luszczek. */ #include #include #include "cblacslt.h" #define DPRN(i,v) do{printf(__FILE__ "(%d)@%d:" #v "=%g\n",__LINE__,i,(double)(v));fflush(stdout);}while(0) /* ---------------------------------------------------------------------- */ /*FIXME: what about parameter checking: context, etc? have a macro too? */ #define CBLACS_INIT if (! CblacsInitialized) CblacsInit(); else if (CblacsFinalized) do{CblacsWarn();return;}while(0) #define CBLACS_INIT1(v) if (! CblacsInitialized) CblacsInit();else if (CblacsFinalized)do{CblacsWarn();return(v);}while(0) #define CblacsWarn() CblacsWarnImp( __FILE__, __LINE__ ) static int CblacsInitialized = 0, CblacsFinalized; double dcputime00(void) {return HPL_ptimer_cputime();} double dwalltime00(void) {return MPI_Wtime();} static void CblacsWarnImp(char *file, int line) { int rank; MPI_Comm_rank( MPI_COMM_WORLD, &rank ); printf( "%s(%d)@%d: CBLACS warning.\n", file, line, rank ); fflush(stdout); } static struct {MPI_Comm comm, rowComm, colComm; unsigned int taken;} CblacsComms[10]; static int CblacsNComms; #define CBLACS_CHK_CTXT(v) if (ctxt < 1 || ctxt > CblacsNComms) return v static MPI_Comm CblacsGetComm(int ctxt) { CBLACS_CHK_CTXT(MPI_COMM_NULL); return CblacsComms[ctxt - 1].comm; } static MPI_Comm CblacsGetRowComm(int ctxt) { CBLACS_CHK_CTXT(MPI_COMM_NULL); return CblacsComms[ctxt - 1].rowComm; } static MPI_Comm CblacsGetColComm(int ctxt) { CBLACS_CHK_CTXT(MPI_COMM_NULL); return CblacsComms[ctxt - 1].colComm; } static int CblacsSetComm(int ctxt, MPI_Comm comm) { CBLACS_CHK_CTXT(-1); CblacsComms[ctxt - 1].comm = comm; return 0; } static int CblacsSetRowComm(int ctxt, MPI_Comm comm) { CBLACS_CHK_CTXT(-1); CblacsComms[ctxt - 1].rowComm = comm; return 0; } static int CblacsSetColComm(int ctxt, MPI_Comm comm) { CBLACS_CHK_CTXT(-1); CblacsComms[ctxt - 1].colComm = comm; return 0; } static int CblacsNewCtxt() { int i; for (i = 1; i < CblacsNComms; i++) if (! CblacsComms[i].taken) { CblacsComms[i].taken = 1; return i + 1; } return 0; } static int CblacsDeleteCtxt(int *ctxtP) { int idx = *ctxtP - 1; if (idx < 1 || idx >= CblacsNComms) { CblacsWarn(); return -1; } if (0 == CblacsComms[idx].taken) { CblacsWarn(); return -1; } if (MPI_COMM_NULL != CblacsComms[idx].colComm) MPI_Comm_free( &(CblacsComms[idx].colComm) ); if (MPI_COMM_NULL != CblacsComms[idx].rowComm) MPI_Comm_free( &(CblacsComms[idx].rowComm) ); if (MPI_COMM_NULL != CblacsComms[idx].comm) MPI_Comm_free( &(CblacsComms[idx].comm) ); CblacsComms[idx].taken = 0; *ctxtP = 0; /* deleted contexts are 0 */ return 0; } /* static void * CblacsNewBuf(int count, int esize) { return malloc( count * esize ); } */ #define CblacsNewBuf(c,s) malloc((c)*(s)) #define CblacsDeleteBuf(b) free(b) static int CblacsInit() { int i, flag; if (MPI_SUCCESS != MPI_Initialized( &flag ) || ! flag) {CblacsWarn();return 1;} CblacsInitialized = 1; CblacsFinalized = 0; CblacsNComms = 10; for (i = 0; i < CblacsNComms; i++) { CblacsComms[i].comm = MPI_COMM_NULL; CblacsComms[i].rowComm = MPI_COMM_NULL; CblacsComms[i].colComm = MPI_COMM_NULL; CblacsComms[i].taken = 0; } /* FIXME: setup system context to be a cartesian grid with row and column comm's*/ CblacsComms[0].comm = MPI_COMM_WORLD; CblacsComms[0].rowComm = MPI_COMM_NULL; CblacsComms[0].colComm = MPI_COMM_NULL; CblacsComms[0].taken = 1; return 0; } void Cblacs_pinfo(int *mypnum, int *nprocs) { CBLACS_INIT; MPI_Comm_rank( MPI_COMM_WORLD, mypnum ); MPI_Comm_size( MPI_COMM_WORLD, nprocs ); } void Cblacs_exit(int NotDone) { CBLACS_INIT; CblacsFinalized = 0; if (! NotDone) MPI_Finalize(); } void Cblacs_abort(int ConTxt, int ErrNo) { int nprow, npcol, myrow, mycol, rank; CBLACS_INIT; MPI_Comm_rank( MPI_COMM_WORLD, &rank ); Cblacs_gridinfo(ConTxt, &nprow, &npcol, &myrow, &mycol); fprintf(stderr, "{%d,%d}, pnum=%d, Contxt=%d, killed other procs, exiting with error #%d.\n\n", myrow, mycol, rank, ConTxt, ErrNo); fflush(stderr); fflush(stdout); MPI_Abort( MPI_COMM_WORLD, ErrNo ); } void Cblacs_get(int ConTxt, int what, int *val) { CBLACS_INIT; switch (what) { case SGET_SYSCONTXT: *val = 1; break; default: *val = -1; CblacsWarn(); break; } } static int CblacsGridNew(int nprow, int npcol, int *ConTxt, MPI_Comm *comm) { int size; CBLACS_INIT1(-1); *comm = CblacsGetComm(*ConTxt); if (MPI_COMM_NULL == *comm) return -1; MPI_Comm_size( *comm, &size ); if (nprow < 1 || nprow > size) return -1; if (npcol < 1 || npcol > size) return -1; if (nprow * npcol > size) return -1; *ConTxt = CblacsNewCtxt(); return 0; } void Cblacs_gridmap(int *ConTxt, int *umap, int ldumap, int nprow, int npcol) { int i, j, np_me, npall, npwho, myrow, mycol, color, key, rv; MPI_Comm comm, newComm, rowComm, colComm; if (CblacsGridNew( nprow, npcol, ConTxt, &comm )) { CblacsWarn(); goto gmapErr; } Cblacs_pinfo( &np_me, &npall ); myrow = mycol = -1; color = MPI_UNDEFINED; key = 0; for (i = 0; i < nprow; ++i) for (j = 0; j < npcol; ++j) { npwho = umap[j + i * ldumap]; if (np_me == npwho) { color = 0; key = j + i * npcol; myrow = i; mycol = j; goto gmapFound; } } gmapFound: /* communicator of all grid processes */ rv = MPI_Comm_split( comm, color, key, &newComm ); if (MPI_SUCCESS != rv) { /* make contexts for non-participating processes a 0 value so gridinfo() works correctly */ CblacsDeleteCtxt( ConTxt ); goto gmapErr; } CblacsSetComm( *ConTxt, newComm ); if (MPI_COMM_NULL == newComm) { /* this process does not participate in this grid */ CblacsDeleteCtxt( ConTxt ); return; } /* row communicator */ rv = MPI_Comm_split( newComm, myrow, mycol, &rowComm ); if (MPI_SUCCESS != rv) { CblacsDeleteCtxt( ConTxt ); goto gmapErr; } CblacsSetRowComm( *ConTxt, rowComm ); /* column communicator */ rv = MPI_Comm_split( newComm, mycol, myrow, &colComm ); if (MPI_SUCCESS != rv) { CblacsDeleteCtxt( ConTxt ); goto gmapErr; } CblacsSetColComm( *ConTxt, colComm ); return; gmapErr: *ConTxt = 0; CblacsWarn(); return; } void Cblacs_gridexit(int ConTxt) { CBLACS_INIT; CblacsDeleteCtxt( &ConTxt ); } void Cblacs_gridinfo(int ConTxt, int *nprow, int *npcol, int *myrow, int *mycol) { MPI_Comm comm; CBLACS_INIT; comm = CblacsGetComm( ConTxt ); /* deleted contexts (or the contexts for non-participating processes) are 0 */ if (MPI_COMM_NULL == comm) { *nprow = *npcol = *myrow = *mycol = -1; } else { MPI_Comm_size( CblacsGetRowComm(ConTxt), npcol ); MPI_Comm_rank( CblacsGetRowComm(ConTxt), mycol ); MPI_Comm_size( CblacsGetColComm(ConTxt), nprow ); MPI_Comm_rank( CblacsGetColComm(ConTxt), myrow ); } } /* ---------------------------------------------------------------------- */ /* Communication routines */ void Cblacs_barrier(int ConTxt, char *scope) { MPI_Comm comm; CBLACS_INIT; switch (*scope) { case 'A': case 'a': comm = CblacsGetComm( ConTxt ); break; case 'C': case 'c': comm = CblacsGetColComm( ConTxt ); break; case 'R': case 'r': comm = CblacsGetRowComm( ConTxt ); break; default: comm = MPI_COMM_NULL; CblacsWarn(); break; } if (MPI_COMM_NULL == comm) { CblacsWarn(); return; } MPI_Barrier( comm ); } static void Cvgred2d(int ConTxt, char *scope, int m, int n, void *A, int lda, int rowRank, int colRank, MPI_Datatype dtype, int dsize, MPI_Op op) { int j, rank, root, count, coords[2], dest_rank, npcol; void *sbuf, *rbuf; MPI_Comm comm; /* if the answer should be left on all processes */ if (-1 == rowRank || -1 == colRank) root = 0; else root = 1; switch (*scope) { case 'A': case 'a': comm = CblacsGetComm( ConTxt ); coords[0] = rowRank; coords[1] = colRank; MPI_Comm_size( CblacsGetRowComm( ConTxt ), &npcol ); dest_rank = colRank + rowRank * npcol; break; case 'C': case 'c': comm = CblacsGetColComm( ConTxt ); coords[0] = rowRank; dest_rank = rowRank; break; case 'R': case 'r': comm = CblacsGetRowComm( ConTxt ); coords[0] = colRank; dest_rank = colRank; break; default: comm = MPI_COMM_NULL; CblacsWarn(); break; } if (MPI_COMM_NULL == comm) { CblacsWarn(); return; } /* if not leave-on-all then get rank of the destination */ if (root) root = dest_rank; /* MPI_Cart_rank( comm, coords, &root ); */ else root = MPI_PROC_NULL; /* FIXME: what if contiguous buffer cannot be allocated */ count = m * n; if (m == lda || n == 1) sbuf = A; /* A is contiguous, reuse it */ else { /* a new data type could be created to reflect layout of `A' but then the * receiving buffer would have to be the same, and if `lda' is large in * comparison to `m' then it might be unfeasible */ sbuf = CblacsNewBuf( count, dsize ); for (j = 0; j < n; j++) memcpy( (char *)sbuf + j * m * dsize, (char *)A + j * lda * dsize, m * dsize ); } rbuf = CblacsNewBuf( count, dsize ); if (MPI_PROC_NULL == root) { MPI_Allreduce( sbuf, rbuf, count, dtype, op, comm ); } else { MPI_Reduce( sbuf, rbuf, count, dtype, op, root, comm ); MPI_Comm_rank( comm, &rank ); } if (MPI_PROC_NULL == root || root == rank) { if (A == sbuf) memcpy( A, rbuf, count * dsize ); /* A is contiguous */ else { for (j = 0; j < n; j++) memcpy( (char *)A + j * lda * dsize, (char *)rbuf + j * m * dsize, m * dsize ); } } CblacsDeleteBuf( rbuf ); if (sbuf != A) CblacsDeleteBuf( sbuf ); } /* * Purpose * * Combine sum operation for double precision rectangular matrices. * * Arguments * * ConTxt (input) int * Index into MyConTxts00 (my contexts array). * * scope (input) Ptr to char * Limit the scope of the operation. * = 'R' : Operation is performed by a process row * = 'C' : Operation is performed by a process column. * = 'A' : Operation is performed by all processes in grid. * If both `rdest' and `cdest' are not -1 then for 'R' scope `rdest' is ignored and for `C' - `cdest' * is ignored (row or column of the scope are used, respectively). * * top (input) Ptr to char * Controls fashion in which messages flow within the operation. * * m (input) int * The number of rows of the matrix A. m >= 0. * * n (input) int * The number of columns of the matrix A. n >= 0. * * A (output) Ptr to double precision two dimensional array * The m by n matrix A. Fortran 77 (column-major) storage * assumed. * * lda (input) int * The leading dimension of the array A. lda >= m. * * rdest (input) int * The process row of the destination of the sum. * If rdest == -1, then result is left on all processes in scope. * * cdest (input) int * The process column of the destination of the sum. * If cdest == -1, then result is left on all processes in scope. */ void Cdgsum2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rdest, int cdest){ CBLACS_INIT; top = top; /* user `top'ology is ignored */ Cvgred2d( ConTxt, scope, m, n, A, lda, rdest, cdest, MPI_DOUBLE, sizeof(double), MPI_SUM ); } void Cigsum2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rdest, int cdest){ CBLACS_INIT; top = top; /* user `top'ology is ignored */ Cvgred2d( ConTxt, scope, m, n, A, lda, rdest, cdest, MPI_INT, sizeof(int), MPI_SUM ); } void CblacsAbsMax(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) { int i, n = *len; double *dinvec, *dinoutvec; if (MPI_DOUBLE == *datatype) { dinvec = (double *)invec; dinoutvec = (double *)inoutvec; for (i = n; i; i--, dinvec++, dinoutvec++) if (fabs(*dinvec) > fabs(*dinoutvec)) *dinoutvec = *dinvec; } else CblacsWarn(); } void CblacsAbsMin(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) { int i, n = *len; double *dinvec, *dinoutvec; if (MPI_DOUBLE == *datatype) { dinvec = (double *)invec; dinoutvec = (double *)inoutvec; for (i = n; i; i--, dinvec++, dinoutvec++) if (fabs(*dinvec) < fabs(*dinoutvec)) *dinoutvec = *dinvec; } else CblacsWarn(); } /* * Purpose * * Combine amx operation for double precision rectangular matrices. * * Arguments * * ConTxt (input) Ptr to int * Index into MyConTxts00 (my contexts array). * * SCOPE (input) Ptr to char * Limit the scope of the operation. * = 'R' : Operation is performed by a process row. * = 'C' : Operation is performed by a process column. * = 'A' : Operation is performed by all processes in grid. * * TOP (input) Ptr to char * Controls fashion in which messages flow within the operation. * * M (input) Ptr to int * The number of rows of the matrix A. M >= 0. * * N (input) Ptr to int * The number of columns of the matrix A. N >= 0. * * A (output) Ptr to double precision two dimensional array * The m by n matrix A. Fortran77 (column-major) storage * assumed. * * LDA (input) Ptr to int * The leading dimension of the array A. LDA >= M. * * RA (output) Integer Array, dimension (LDIA, N) * Contains process row that the amx of each element * of A was found on: i.e., rA(1,2) contains the process * row that the amx of A(1,2) was found on. * Values are left on process {rdest, cdest} only, others * may be modified, but not left with interesting data. * If rdest == -1, then result is left on all processes in scope. * If LDIA == -1, this array is not accessed, and need not exist. * * CA (output) Integer Array, dimension (LDIA, N) * Contains process column that the amx of each element * of A was found on: i.e., cA(1,2) contains the process * column that the max/min of A(1,2) was found on. * Values are left on process {rdest, cdest} only, others * may be modified, but not left with interesting data. * If rdest == -1, then result is left on all processes in scope. * If LDIA == -1, this array is not accessed, and need not exist. * * LDIA (input) Ptr to int * If (LDIA == -1), then the arrays RA and CA are not accessed. * ELSE leading dimension of the arrays RA and CA. LDIA >= M. * * RDEST (input) Ptr to int * The process row of the destination of the amx. * If rdest == -1, then result is left on all processes in scope. * * CDEST (input) Ptr to int * The process column of the destination of the amx. * If rdest == -1, then CDEST ignored. */ void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA, int *cA, int ldia, int rdest, int cdest) { MPI_Op op; CBLACS_INIT; if (ldia > 0) {CblacsWarn(); rA = cA; return;} /* no AMAX_LOC yet */ MPI_Op_create( CblacsAbsMax, 1, &op ); top = top; /* user `top'ology is ignored */ Cvgred2d( ConTxt, scope, m, n, A, lda, rdest, cdest, MPI_DOUBLE, sizeof(double), op ); MPI_Op_free( &op ); } void Cdgamn2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA, int *cA, int ldia, int rdest, int cdest) { MPI_Op op; CBLACS_INIT; if (ldia > 0) {CblacsWarn(); rA = cA; return;} /* no AMAX_LOC yet */ MPI_Op_create( CblacsAbsMin, 1, &op ); top = top; /* user `top'ology is ignored */ Cvgred2d( ConTxt, scope, m, n, A, lda, rdest, cdest, MPI_DOUBLE, sizeof(double), op ); MPI_Op_free( &op ); } void Cblacs_dSendrecv(int ctxt, int mSrc, int nSrc, double *Asrc, int ldaSrc, int rdest, int cdest, int mDest, int nDest, double *Adest, int ldaDest, int rsrc, int csrc) { MPI_Comm comm, rowComm; MPI_Datatype typeSrc, typeDest; MPI_Status stat; int src, dest, dataIsContiguousSrc, dataIsContiguousDest, countSrc, countDest, npcol; CBLACS_INIT; comm = CblacsGetComm( ctxt ); if (MPI_COMM_NULL == comm) {CblacsWarn(); return;} if (mSrc == ldaSrc || 1 == nSrc) { dataIsContiguousSrc = 1; countSrc = mSrc * nSrc; typeSrc = MPI_DOUBLE; } else { dataIsContiguousSrc = 0; countSrc = 1; MPI_Type_vector( nSrc, mSrc, ldaSrc, MPI_DOUBLE, &typeSrc ); MPI_Type_commit( &typeSrc ); } if (mDest == ldaDest || 1 == nDest) { dataIsContiguousDest = 1; countDest = mDest * nDest; typeDest = MPI_DOUBLE; } else { dataIsContiguousDest = 0; countDest = 1; MPI_Type_vector( nDest, mDest, ldaDest, MPI_DOUBLE, &typeDest ); MPI_Type_commit( &typeDest ); } rowComm = CblacsGetRowComm( ctxt ); MPI_Comm_size( rowComm, &npcol ); dest = cdest + rdest * npcol; src = csrc + rsrc * npcol; MPI_Sendrecv( Asrc, countSrc, typeSrc, dest, 0, Adest, countDest, typeDest, src, 0, comm, &stat ); /* IBM's (old ?) MPI doesn't have: MPI_STATUS_IGNORE */ if (! dataIsContiguousSrc) MPI_Type_free( &typeSrc ); if (! dataIsContiguousDest) MPI_Type_free( &typeDest ); } static void CblacsBcast(int ConTxt, char *scope, int m, int n, void *A, int lda, int rowRank, int colRank, MPI_Datatype baseType){ MPI_Comm comm; MPI_Datatype type; int root, coords[2], dest_rank, npcol; /* if this process is the root of broadcast */ if (-1 == rowRank || -1 == colRank) root = 0; else root = 1; switch (*scope) { case 'A': case 'a': comm = CblacsGetComm( ConTxt ); coords[0] = rowRank; coords[1] = colRank; MPI_Comm_size( CblacsGetRowComm( ConTxt ), &npcol ); dest_rank = colRank + rowRank * npcol; break; case 'C': case 'c': comm = CblacsGetColComm( ConTxt ); coords[0] = rowRank; dest_rank = rowRank; break; case 'R': case 'r': comm = CblacsGetRowComm( ConTxt ); coords[0] = colRank; dest_rank = colRank; break; default: comm = MPI_COMM_NULL; CblacsWarn(); break; } if (MPI_COMM_NULL == comm) { CblacsWarn(); return; } if (MPI_COMM_NULL == comm) { CblacsWarn(); return; } /* if broadcast/receive */ if (root) root = dest_rank; /* MPI_Cart_rank( comm, coords, &root ); */ else MPI_Comm_rank( comm, &root ); /* else broadcast/send - I'm the root */ MPI_Type_vector( n, m, lda, baseType, &type ); MPI_Type_commit( &type ); MPI_Bcast( A, 1, type, root, comm ); MPI_Type_free( &type ); } /* * Purpose * * Broadcast/send for general double precision arrays. * * Arguments * * ConTxt (input) Ptr to int * Index into MyConTxts00 (my contexts array). * * SCOPE (input) Ptr to char * Limit the scope of the operation. * = 'R' : Operation is performed by a process row. * = 'C' : Operation is performed by a process column. * = 'A' : Operation is performed by all processes in grid. * * TOP (input) Ptr to char * Controls fashion in which messages flow within the operation. * * M (input) Ptr to int * The number of rows of the matrix A. M >= 0. * * N (input) Ptr to int * The number of columns of the matrix A. N >= 0. * * A (input) Ptr to double precision two dimensional array * The m by n matrix A. Fortran77 (column-major) storage * assumed. * * LDA (input) Ptr to int * The leading dimension of the array A. LDA >= M. */ void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda) { CBLACS_INIT; top = top; /* user `top'ology is ignored */ CblacsBcast( ConTxt, scope, m, n, A, lda, -1, -1, MPI_DOUBLE ); } /* * Purpose * * Broadcast/receive for general double precision arrays. * * Arguments * * ConTxt (input) Ptr to int * Index into MyConTxts00 (my contexts array). * * SCOPE (input) Ptr to char * Limit the scope of the operation. * = 'R' : Operation is performed by a process row. * = 'C' : Operation is performed by a process column. * = 'A' : Operation is performed by all processes in grid. * * TOP (input) Ptr to char * Controls fashion in which messages flow within the operation. * * M (input) Ptr to int * The number of rows of the matrix A. M >= 0. * * N (input) Ptr to int * The number of columns of the matrix A. N >= 0. * * A (output) Ptr to double precision two dimensional array * The m by n matrix A. Fortran77 (column-major) storage * assumed. * * LDA (input) Ptr to int * The leading dimension of the array A. LDA >= M. * * * RSRC (input) Ptr to int * The process row of the source of the matrix. * * CSRC (input) Ptr to int * The process column of the source of the matrix. */ void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc) { CBLACS_INIT; top = top; /* user `top'ology is ignored */ CblacsBcast( ConTxt, scope, m, n, A, lda, rsrc, csrc, MPI_DOUBLE ); } void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda) { CBLACS_INIT; top = top; /* user `top'ology is ignored */ CblacsBcast( ConTxt, scope, m, n, A, lda, -1, -1, MPI_INT ); } void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc) { CBLACS_INIT; top = top; /* user `top'ology is ignored */ CblacsBcast( ConTxt, scope, m, n, A, lda, rsrc, csrc, MPI_INT ); } hpcc-1.4.1/PTRANS/cblacslt.h0000644000000000000000000000363111256503657012305 00000000000000 #define SGET_SYSCONTXT 0 #define SGET_BLACSCONTXT 10 extern double dcputime00(void); extern double dwalltime00(void); extern void Cblacs_abort(int ConTxt, int ErrNo); extern void Cblacs_barrier(int ConTxt, char *scope); extern void Cblacs_exit(int NotDone); extern void Cblacs_get(int ConTxt, int what, int *val); extern void Cblacs_gridexit(int ConTxt); extern void Cblacs_gridinfo(int ConTxt, int *nprow, int *npcol, int *myrow, int *mycol); extern void Cblacs_gridinit(int *ConTxt, char *order, int nprow, int npcol); extern void Cblacs_gridmap(int *ConTxt, int *umap, int ldumap, int nprow, int npcol); extern void Cblacs_pinfo(int *mypnum, int *nprocs); extern void Cdgamn2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA, int *cA, int ldia, int rdest, int cdest); extern void Cdgamx2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int *rA, int *cA, int ldia, int rdest, int cdest); extern void Cdgebr2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rsrc, int csrc); extern void Cdgebs2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda); extern void Cdgerv2d(int ConTxt, int m, int n, double *A, int lda, int rsrc, int csrc); extern void Cdgesd2d(int ConTxt, int m, int n, double *A, int lda, int rdest, int cdest); extern void Cdgsum2d(int ConTxt, char *scope, char *top, int m, int n, double *A, int lda, int rdest, int cdest); extern void Cigebr2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rsrc, int csrc); extern void Cigebs2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda); extern void Cigsum2d(int ConTxt, char *scope, char *top, int m, int n, int *A, int lda, int rdest, int cdest); extern void Cblacs_dSendrecv(int ctxt, int mSrc, int nSrc, double *Asrc, int ldaSrc, int rdest, int cdest, int mDest, int nDest, double *Adest, int ldaDest, int rsrc, int csrc); hpcc-1.4.1/PTRANS/mem.c0000644000000000000000000001106711256503657011271 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ #include static int CheckNode(int imrow, int imcol, int nmat, int *mval, int *nval, int nbmat, int *mbval, int *nbval, int myrow, int mycol, int nprow, int npcol, long *maxMem) { int i__, ii, m, n, mb, nb, ierr[1]; int lcm, np0, nq0, mp0, mq0, mg, ng, np, nq, mp, mq; long isw, ipw, ipiw, ipa, ipc; *maxMem = 0; for (i__ = 0; i__ < nmat; ++i__) { m = mval[i__]; n = nval[i__]; /* Make sure matrix information is correct */ ierr[0] = 0; if (m < 1) { ierr[0] = 1; } else if (n < 1) { ierr[0] = 1; } if (ierr[0] > 0) { continue; } for (ii = 0; ii < nbmat; ++ii) { /* Loop over different block sizes */ mb = mbval[ii]; nb = nbval[ii]; /* Make sure blocking sizes are legal */ ierr[0] = 0; if (mb < 1) { ierr[0] = 1; } else if (nb < 1) { ierr[0] = 1; } /* Make sure no one had error */ if (ierr[0] > 0) { continue; } mp = numroc_(&m, &mb, &myrow, &imrow, &nprow); mq = numroc_(&m, &mb, &mycol, &imcol, &npcol); np = numroc_(&n, &nb, &myrow, &imrow, &nprow); nq = numroc_(&n, &nb, &mycol, &imcol, &npcol); mg = iceil_(&m, &mb); ng = iceil_(&n, &nb); mp0 = iceil_(&mg, &nprow) * mb; mq0 = iceil_(&mg, &npcol) * mb; np0 = iceil_(&ng, &nprow) * nb; nq0 = iceil_(&ng, &npcol) * nb; lcm = ilcm_(&nprow, &npcol); ipc = 1; ipa = ipc + (long)np0 * (long)mq0; ipiw = (long)mp0 * (long)nq0 + ipa; ipw = ipiw; isw = ipw + (long)(iceil_(&mg, &lcm) << 1) * (long)mb * (long)iceil_(&ng, &lcm) * (long)nb; if (*maxMem < isw) *maxMem = isw; } } return 0; } int MaxMem(int nprocs, int imrow, int imcol, int nmat, int *mval, int *nval, int nbmat, int *mbval, int *nbval, int ngrids, int *npval, int *nqval, long *maxMem) { int nprow, npcol, myrow, mycol; int j, ierr[1]; long curMem; *maxMem = 0; for (j = 0; j < ngrids; ++j) { nprow = npval[j]; npcol = nqval[j]; /* Make sure grid information is correct */ ierr[0] = 0; if (nprow < 1) { ierr[0] = 1; } else if (npcol < 1) { ierr[0] = 1; } else if (nprow * npcol > nprocs) { ierr[0] = 1; } if (ierr[0] > 0) { continue; } for (myrow = 0; myrow < nprow; myrow++) for (mycol = 0; mycol < npcol; mycol++) { CheckNode( imrow, imcol, nmat, mval, nval, nbmat, mbval, nbval, myrow, mycol, nprow, npcol, &curMem ); if (*maxMem < curMem) *maxMem = curMem; } } return 0; } #ifdef HPCC_MEMMAIN #include int iceil_(int *n,int *d) {return *n>0 ? (*n+*d-1)/ *d : *n/ *d;} int numroc_(int *n, int *nb, int *iproc, int *isrcproc, int *nprocs) { int ret_val, extrablks, mydist, nblocks; mydist = (*nprocs + *iproc - *isrcproc) % *nprocs; nblocks = *n / *nb; ret_val = nblocks / *nprocs * *nb; extrablks = nblocks % *nprocs; if (mydist < extrablks) { ret_val += *nb; } else if (mydist == extrablks) { ret_val += *n % *nb; } return ret_val; } int ilcm_(int *m, int *n) { int ret_val; int ia, iq, ir; if (*m >= *n) { ia = *m; ret_val = *n; } else { ia = *n; ret_val = *m; } for (;;) { iq = ia / ret_val; ir = ia - iq * ret_val; if (ir == 0) { ret_val = *m * *n / ret_val; return ret_val; } ia = ret_val; ret_val = ir; } } int main(int argc, char *argv[]) { int n, nb, nprow, npcol, ng, lcm; int nval[1], nbval[1]; long maxMem; if (argc <= 1) { printf( "Usage:\n%s n nb nprow npcol\n", argv[0] ); } if (argc <= 1 || sscanf( argv[1], "%d", &n ) != 1 || n < 1) n = 50000; if (argc <= 2 || sscanf( argv[2], "%d", &nb ) != 1 || nb < 1) nb = 80; if (argc <= 3 || sscanf( argv[3], "%d", &nprow ) != 1 || nprow < 1) nprow = 8; if (argc <= 4 || sscanf( argv[4], "%d", &npcol ) != 1 || npcol < 1) npcol = nprow; nval[0] = n; nbval[0] = nb; CheckNode( 0, 0, 1, nval, nval, 1, nbval, nbval, 0, 0, nprow, npcol, &maxMem ); printf( "n=%d nb=%d nprow=%d npcol=%d lcm(nprow,npcol)=%d\n%ld\n", n, nb, nprow, npcol, ilcm_(&nprow, &npcol), maxMem ); ng = iceil_(&n, &nb); lcm = ilcm_(&nprow, &npcol); printf( "%d %d %d\n", ng, lcm, (iceil_(&ng, &lcm) << 1) * nb * iceil_(&ng, &lcm) * nb ); printf( "%d %d\n", (iceil_(&ng, &lcm) << 1), iceil_(&ng, &lcm) ); return 0; } #endif hpcc-1.4.1/PTRANS/pdmatcmp.c0000644000000000000000000000423411256503657012316 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* pdmatcmp.c */ #include #include "cblacslt.h" /* Purpose PDMATCMP : Parallel Real Double precision MATrix CoMPare. Finds ERROR = MAX(||A(i,j) - ACOPY(i,j)||) / MAX(|A(i,j)|) locally. Then, returns the global maximum from the local values. Arguments M (input) INTEGER Number of rows of the local matrices A and B. M >= 0. N (input) INTEGER Number of columns of the local matrices A and B. N >= 0. A (input/output) DOUBLE PRECISION, dimension ( LDA, N ) The pointer to the local matrix A. LDA (input) INTEGER Leading Dimension of A. LDA >= M ACOPY (output) DOUBLE PRECISION, dimension ( LDB, N ) The pointer to the local matrix ACOPY. LDB (input) INTEGER Leading Dimension of B. LDB >= M. ERROR (output) DOUBLE PRECISION ERROR = MAX(|A(i,j) - ACOPY(i,j)|) / MAX(|A(i,j)|) (unscaled residual) */ int pdmatcmp(int *ictxt, int *m_, int *n_, double *a, int *lda_, double *aCopy, int *ldc_, double *error) { int ctxt = *ictxt, m = *m_, n = *n_; long lda = *lda_, ldc = *ldc_; int j, info, ix; double v, aMax, vals[2]; info = 0; aMax = *error = 0.0; if (m < 0) info = 1; else if (n < 0) info = 2; else if (lda < m) info = 4; else if (ldc < m) info = 6; if (info != 0) { pxerbla( ictxt, "pdmatcmp", &info ); return 0; } for (j = 0; j < n; j++) { /* the largest absolute value in column */ ix = HPL_idamax( m, a + j * lda, 1 ); v = fabs( a[ix + j * lda] ); if (aMax < v) aMax = v; /* difference of two columns */ HPL_daxpy( m, -1.0, a + j * lda, 1, aCopy + j * ldc, 1 ); /* the largest absolute value in column */ ix = HPL_idamax( m, aCopy + j * ldc, 1 ); v = fabs( aCopy[ix + j * ldc] ); if (*error < v) *error = v; } /* calculate max of error and max-of-a over all processes */ vals[0] = *error; vals[1] = aMax; Cdgamx2d( ctxt, "All", " ", 2,1, vals, 2, &j, &j, -1, -1, -1 ); *error = vals[0]; aMax = vals[1]; *error /= aMax; /* calculate unscaled residual */ return 0; } hpcc-1.4.1/PTRANS/pdmatgen.c0000644000000000000000000003714511256503657012317 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ #include int pdmatgen(int *ictxt, char *aform, char *diag, int *m, int *n, int *mb, int *nb, double *aMat, int *lda, int *iarow, int *iacol, int *iseed, int *iroff, int *irnum, int *icoff, int *icnum, int * myrow, int *mycol, int *nprow, int *npcol, double alpha) { /* System generated locals */ long a_dim1, a_offset; int i__1, i__2, i__3, i__4; double d__1, d_tmp; /* Local variables */ static int i__, j, ic, ik, jk, ir, mp, nq, ia1[2], ia2[2], ia3[2], ia4[2], ia5[2], ib1[2], ib2[2], ib3[2], ic1[2], ic2[2], ic3[2], ic4[2], ic5[2], iadd[2], mend, nend, moff, noff; static int herm; static int info, npmb, nqnb; static int tran; static int mult[2]; static int symm; static int iran1[2], iran2[2], iran3[2], iran4[2], itmp1[2], itmp2[2], jump1, jump2, jump3, jump4, jump5, jump6, jump7, itmp3[2]; static int ioffc, jseed[2]; static int ioffr, mrcol, maxmn, mrrow; /* -- ScaLAPACK routine (version 1.0) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* February 28, 1995 */ /* Purpose */ /* PDMATGEN : Parallel Real Double precision MATrix GENerator. */ /* Generate (or regenerate) a distributed matrix A (or sub-matrix of A). */ /* Arguments */ /* ICTXT (global input) INTEGER */ /* The BLACS context handle, indicating the global context of */ /* the operation. The context itself is global. */ /* AFORM (global input) CHARACTER*1 */ /* if AFORM = 'S' : A is returned is a symmetric matrix. */ /* if AFORM = 'H' : A is returned is a Hermitian matrix. */ /* if AFORM = 'T' : A is overwritten with the transpose of */ /* what would normally be generated. */ /* if AFORM = 'C' : A is overwritten with the conjugate trans- */ /* pose of what would normally be generated. */ /* otherwise a random matrix is generated. */ /* DIAG (global input) CHARACTER*1 */ /* if DIAG = 'D' : A is diagonally dominant. */ /* M (global input) INTEGER */ /* The number of rows in the generated distributed matrix. */ /* N (global input) INTEGER */ /* The number of columns in the generated distributed */ /* matrix. */ /* MB (global input) INTEGER */ /* The row blocking factor of the distributed matrix A. */ /* NB (global input) INTEGER */ /* The column blocking factor of the distributed matrix A. */ /* A (local output) DOUBLE PRECISION, pointer into the local */ /* memory to an array of dimension ( LDA, * ) containing the */ /* local pieces of the distributed matrix. */ /* LDA (local input) INTEGER */ /* The leading dimension of the array containing the local */ /* pieces of the distributed matrix A. */ /* IAROW (global input) INTEGER */ /* The row processor coordinate which holds the first block */ /* of the distributed matrix A. */ /* IACOL (global input) INTEGER */ /* The column processor coordinate which holds the first */ /* block of the distributed matrix A. */ /* ISEED (global input) INTEGER */ /* The seed number to generate the distributed matrix A. */ /* IROFF (local input) INTEGER */ /* The number of local rows of A that have already been */ /* generated. It should be a multiple of MB. */ /* IRNUM (local input) INTEGER */ /* The number of local rows to be generated. */ /* ICOFF (local input) INTEGER */ /* The number of local columns of A that have already been */ /* generated. It should be a multiple of NB. */ /* ICNUM (local input) INTEGER */ /* The number of local columns to be generated. */ /* MYROW (local input) INTEGER */ /* The row process coordinate of the calling process. */ /* MYCOL (local input) INTEGER */ /* The column process coordinate of the calling process. */ /* NPROW (global input) INTEGER */ /* The number of process rows in the grid. */ /* NPCOL (global input) INTEGER */ /* The number of process columns in the grid. */ /* ALPHA (global input) DOUBLE PRECISION, multiplication factor for old content of `A': A(I,J) <- ALPHA * A(I,J) + RANDOM(I,J) If ALPHA is zero then A is overwritten with random numbers. For non-zero ALPHAs, A gets updated. */ /* Notes */ /* The code is originally developed by David Walker, ORNL, */ /* and modified by Jaeyoung Choi, ORNL. */ /* Reference: G. Fox et al. */ /* Section 12.3 of "Solving problems on concurrent processors Vol. I" */ /* Test the input arguments */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; aMat -= a_offset; /* Function Body */ mp = numroc_(m, mb, myrow, iarow, nprow); nq = numroc_(n, nb, mycol, iacol, npcol); symm = (*aform == 'S' ? 1 : 0); herm = (*aform == 'H' ? 1 : 0); tran = (*aform == 'T' ? 1 : 0); info = 0; if (! (symm || herm || tran) && *aform != 'C' && *aform != 'N') { info = 2; } else if (*diag != 'D' && *diag != 'N') { info = 3; } else if (symm || herm) { if (*m != *n) { info = 5; } else if (*mb != *nb) { info = 7; } } else if (*m < 0) { info = 4; } else if (*n < 0) { info = 5; } else if (*mb < 1) { info = 6; } else if (*nb < 1) { info = 7; } else if (*lda < 0) { info = 9; } else if (*iarow < 0 || *iarow >= *nprow) { info = 10; } else if (*iacol < 0 || *iacol >= *npcol) { info = 11; } else if (*iroff % *mb > 0) { info = 13; } else if (*irnum > mp - *iroff) { info = 14; } else if (*icoff % *nb > 0) { info = 15; } else if (*icnum > nq - *icoff) { info = 16; } else if (*myrow < 0 || *myrow >= *nprow) { info = 17; } else if (*mycol < 0 || *mycol >= *npcol) { info = 18; } else if (*diag == 'D' && alpha != 0.0) { info = 19; /* diagonal scaling is not implemented with matrix update (rather than overwrite) */ } if (info != 0) { pxerbla( ictxt, "PDMATGEN", &info ); return 0; } mrrow = (*nprow + *myrow - *iarow) % *nprow; mrcol = (*npcol + *mycol - *iacol) % *npcol; npmb = *nprow * *mb; nqnb = *npcol * *nb; moff = *iroff / *mb; noff = *icoff / *nb; mend = iceil_(irnum, mb) + moff; nend = iceil_(icnum, nb) + noff; mult[0] = 20077; mult[1] = 16838; iadd[0] = 12345; iadd[1] = 0; jseed[0] = *iseed; jseed[1] = 0; /* Symmetric or Hermitian matrix will be generated. */ if (symm || herm) { /* First, generate the lower triangular part (with diagonal block) */ jump1 = 1; jump2 = npmb; jump3 = *m; jump4 = nqnb; jump5 = *nb; jump6 = mrcol; jump7 = *mb * mrrow; xjumpm_(&jump1, mult, iadd, jseed, iran1, ia1, ic1); xjumpm_(&jump2, mult, iadd, iran1, itmp1, ia2, ic2); xjumpm_(&jump3, mult, iadd, iran1, itmp1, ia3, ic3); xjumpm_(&jump4, ia3, ic3, iran1, itmp1, ia4, ic4); xjumpm_(&jump5, ia3, ic3, iran1, itmp1, ia5, ic5); xjumpm_(&jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2); xjumpm_(&jump7, mult, iadd, itmp3, iran1, itmp1, itmp2); xjumpm_(&noff, ia4, ic4, iran1, itmp1, itmp2, itmp3); xjumpm_(&moff, ia2, ic2, itmp1, iran1, itmp2, itmp3); setran_(iran1, ia1, ic1); for (i__ = 1; i__ <= 2; ++i__) { ib1[i__ - 1] = iran1[i__ - 1]; ib2[i__ - 1] = iran1[i__ - 1]; ib3[i__ - 1] = iran1[i__ - 1]; /* L10: */ } jk = 1; i__1 = nend; for (ic = noff + 1; ic <= i__1; ++ic) { ioffc = ((ic - 1) * *npcol + mrcol) * *nb; i__2 = *nb; for (i__ = 1; i__ <= i__2; ++i__) { if (jk > *icnum) { goto L90; } ik = 1; i__3 = mend; for (ir = moff + 1; ir <= i__3; ++ir) { ioffr = ((ir - 1) * *nprow + mrrow) * *mb; if (ioffr > ioffc) { i__4 = *mb; for (j = 1; j <= i__4; ++j) { if (ik > *irnum) { goto L60; } aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; ++ik; /* L20: */ } } else if (ioffc == ioffr) { ik = ik + i__ - 1; if (ik > *irnum) { goto L60; } i__4 = i__ - 1; for (j = 1; j <= i__4; ++j) { aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; /* L30: */ } aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; i__4 = *mb - i__; for (j = 1; j <= i__4; ++j) { if (ik + j > *irnum) { goto L60; } d_tmp = 1. - pdrand() * 2.; aMat[ik + j + jk * a_dim1] = alpha * aMat[ik + j + jk * a_dim1] + d_tmp; aMat[ik + (jk + j) * a_dim1] = alpha * aMat[ik + (jk + j) * a_dim1] + d_tmp; /* L40: */ } ik = ik + *mb - i__ + 1; } else { ik += *mb; } jumpit_(ia2, ic2, ib1, iran2); ib1[0] = iran2[0]; ib1[1] = iran2[1]; /* L50: */ } L60: ++jk; jumpit_(ia3, ic3, ib2, iran3); ib1[0] = iran3[0]; ib1[1] = iran3[1]; ib2[0] = iran3[0]; ib2[1] = iran3[1]; /* L70: */ } jumpit_(ia4, ic4, ib3, iran4); ib1[0] = iran4[0]; ib1[1] = iran4[1]; ib2[0] = iran4[0]; ib2[1] = iran4[1]; ib3[0] = iran4[0]; ib3[1] = iran4[1]; /* L80: */ } /* Next, generate the upper triangular part. */ L90: mult[0] = 20077; mult[1] = 16838; iadd[0] = 12345; iadd[1] = 0; jseed[0] = *iseed; jseed[1] = 0; jump1 = 1; jump2 = nqnb; jump3 = *n; jump4 = npmb; jump5 = *mb; jump6 = mrrow; jump7 = *nb * mrcol; xjumpm_(&jump1, mult, iadd, jseed, iran1, ia1, ic1); xjumpm_(&jump2, mult, iadd, iran1, itmp1, ia2, ic2); xjumpm_(&jump3, mult, iadd, iran1, itmp1, ia3, ic3); xjumpm_(&jump4, ia3, ic3, iran1, itmp1, ia4, ic4); xjumpm_(&jump5, ia3, ic3, iran1, itmp1, ia5, ic5); xjumpm_(&jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2); xjumpm_(&jump7, mult, iadd, itmp3, iran1, itmp1, itmp2); xjumpm_(&moff, ia4, ic4, iran1, itmp1, itmp2, itmp3); xjumpm_(&noff, ia2, ic2, itmp1, iran1, itmp2, itmp3); setran_(iran1, ia1, ic1); for (i__ = 1; i__ <= 2; ++i__) { ib1[i__ - 1] = iran1[i__ - 1]; ib2[i__ - 1] = iran1[i__ - 1]; ib3[i__ - 1] = iran1[i__ - 1]; /* L100: */ } ik = 1; i__1 = mend; for (ir = moff + 1; ir <= i__1; ++ir) { ioffr = ((ir - 1) * *nprow + mrrow) * *mb; i__2 = *mb; for (j = 1; j <= i__2; ++j) { if (ik > *irnum) { goto L160; } jk = 1; i__3 = nend; for (ic = noff + 1; ic <= i__3; ++ic) { ioffc = ((ic - 1) * *npcol + mrcol) * *nb; if (ioffc > ioffr) { i__4 = *nb; for (i__ = 1; i__ <= i__4; ++i__) { if (jk > *icnum) { goto L130; } aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; ++jk; /* L110: */ } } else { jk += *nb; } jumpit_(ia2, ic2, ib1, iran2); ib1[0] = iran2[0]; ib1[1] = iran2[1]; /* L120: */ } L130: ++ik; jumpit_(ia3, ic3, ib2, iran3); ib1[0] = iran3[0]; ib1[1] = iran3[1]; ib2[0] = iran3[0]; ib2[1] = iran3[1]; /* L140: */ } jumpit_(ia4, ic4, ib3, iran4); ib1[0] = iran4[0]; ib1[1] = iran4[1]; ib2[0] = iran4[0]; ib2[1] = iran4[1]; ib3[0] = iran4[0]; ib3[1] = iran4[1]; /* L150: */ } L160: /* (Conjugate) Transposed matrix A will be generated. */ ; } else if (tran || *aform == 'C') { jump1 = 1; jump2 = nqnb; jump3 = *n; jump4 = npmb; jump5 = *mb; jump6 = mrrow; jump7 = *nb * mrcol; xjumpm_(&jump1, mult, iadd, jseed, iran1, ia1, ic1); xjumpm_(&jump2, mult, iadd, iran1, itmp1, ia2, ic2); xjumpm_(&jump3, mult, iadd, iran1, itmp1, ia3, ic3); xjumpm_(&jump4, ia3, ic3, iran1, itmp1, ia4, ic4); xjumpm_(&jump5, ia3, ic3, iran1, itmp1, ia5, ic5); xjumpm_(&jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2); xjumpm_(&jump7, mult, iadd, itmp3, iran1, itmp1, itmp2); xjumpm_(&moff, ia4, ic4, iran1, itmp1, itmp2, itmp3); xjumpm_(&noff, ia2, ic2, itmp1, iran1, itmp2, itmp3); setran_(iran1, ia1, ic1); for (i__ = 1; i__ <= 2; ++i__) { ib1[i__ - 1] = iran1[i__ - 1]; ib2[i__ - 1] = iran1[i__ - 1]; ib3[i__ - 1] = iran1[i__ - 1]; /* L170: */ } ik = 1; i__1 = mend; for (ir = moff + 1; ir <= i__1; ++ir) { ioffr = ((ir - 1) * *nprow + mrrow) * *mb; i__2 = *mb; for (j = 1; j <= i__2; ++j) { if (ik > *irnum) { goto L230; } jk = 1; i__3 = nend; for (ic = noff + 1; ic <= i__3; ++ic) { ioffc = ((ic - 1) * *npcol + mrcol) * *nb; i__4 = *nb; for (i__ = 1; i__ <= i__4; ++i__) { if (jk > *icnum) { goto L200; } aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; ++jk; /* L180: */ } jumpit_(ia2, ic2, ib1, iran2); ib1[0] = iran2[0]; ib1[1] = iran2[1]; /* L190: */ } L200: ++ik; jumpit_(ia3, ic3, ib2, iran3); ib1[0] = iran3[0]; ib1[1] = iran3[1]; ib2[0] = iran3[0]; ib2[1] = iran3[1]; /* L210: */ } jumpit_(ia4, ic4, ib3, iran4); ib1[0] = iran4[0]; ib1[1] = iran4[1]; ib2[0] = iran4[0]; ib2[1] = iran4[1]; ib3[0] = iran4[0]; ib3[1] = iran4[1]; /* L220: */ } L230: /* A random matrix is generated. */ ; } else { jump1 = 1; jump2 = npmb; jump3 = *m; jump4 = nqnb; jump5 = *nb; jump6 = mrcol; jump7 = *mb * mrrow; xjumpm_(&jump1, mult, iadd, jseed, iran1, ia1, ic1); xjumpm_(&jump2, mult, iadd, iran1, itmp1, ia2, ic2); xjumpm_(&jump3, mult, iadd, iran1, itmp1, ia3, ic3); xjumpm_(&jump4, ia3, ic3, iran1, itmp1, ia4, ic4); xjumpm_(&jump5, ia3, ic3, iran1, itmp1, ia5, ic5); xjumpm_(&jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2); xjumpm_(&jump7, mult, iadd, itmp3, iran1, itmp1, itmp2); xjumpm_(&noff, ia4, ic4, iran1, itmp1, itmp2, itmp3); xjumpm_(&moff, ia2, ic2, itmp1, iran1, itmp2, itmp3); setran_(iran1, ia1, ic1); for (i__ = 1; i__ <= 2; ++i__) { ib1[i__ - 1] = iran1[i__ - 1]; ib2[i__ - 1] = iran1[i__ - 1]; ib3[i__ - 1] = iran1[i__ - 1]; /* L240: */ } jk = 1; i__1 = nend; for (ic = noff + 1; ic <= i__1; ++ic) { ioffc = ((ic - 1) * *npcol + mrcol) * *nb; i__2 = *nb; for (i__ = 1; i__ <= i__2; ++i__) { if (jk > *icnum) { goto L300; } ik = 1; i__3 = mend; for (ir = moff + 1; ir <= i__3; ++ir) { ioffr = ((ir - 1) * *nprow + mrrow) * *mb; i__4 = *mb; for (j = 1; j <= i__4; ++j) { if (ik > *irnum) { goto L270; } aMat[ik + jk * a_dim1] = alpha * aMat[ik + jk * a_dim1] + 1. - pdrand() * 2.; ++ik; /* L250: */ } jumpit_(ia2, ic2, ib1, iran2); ib1[0] = iran2[0]; ib1[1] = iran2[1]; /* L260: */ } L270: ++jk; jumpit_(ia3, ic3, ib2, iran3); ib1[0] = iran3[0]; ib1[1] = iran3[1]; ib2[0] = iran3[0]; ib2[1] = iran3[1]; /* L280: */ } jumpit_(ia4, ic4, ib3, iran4); ib1[0] = iran4[0]; ib1[1] = iran4[1]; ib2[0] = iran4[0]; ib2[1] = iran4[1]; ib3[0] = iran4[0]; ib3[1] = iran4[1]; /* L290: */ } L300: ; } /* Diagonally dominant matrix will be generated. */ if (*diag == 'D') { if (*mb != *nb) { printf( "Diagonally dominant matrices with rowNB not equal colNB is not supported!" ); return 0; } maxmn = Mmax(*m,*n); jk = 1; i__1 = nend; for (ic = noff + 1; ic <= i__1; ++ic) { ioffc = ((ic - 1) * *npcol + mrcol) * *nb; ik = 1; i__2 = mend; for (ir = moff + 1; ir <= i__2; ++ir) { ioffr = ((ir - 1) * *nprow + mrrow) * *mb; if (ioffc == ioffr) { i__3 = *mb - 1; for (j = 0; j <= i__3; ++j) { if (ik > *irnum) { goto L330; } aMat[ik + (jk + j) * a_dim1] = (d__1 = aMat[ik + (jk + j) * a_dim1], fabs(d__1)) + maxmn; ++ik; /* L310: */ } } else { ik += *mb; } /* L320: */ } L330: jk += *nb; /* L340: */ } } return 0; } /* pdmatgen */ hpcc-1.4.1/PTRANS/pdtrans.c0000644000000000000000000005364611256503657012177 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ #include #include "cblacslt.h" /* Common Block Declarations */ struct { int iaz, jaz, itz, jtz; } commtrb_; #define commtrb_1 commtrb_ extern struct { int ictxt; } context_; #define context_1 context_ /* Table of constant values */ static int c__0 = 0; static int dtr2mx_(double *a, int *lda, double *beta, double *t, int *ldt, int *nrow, int *ncol, int * mb, int *nb, int *ilt, int *jlt) { /* System generated locals */ long a_dim1, a_offset, t_dim1, t_offset; int i__1, i__2, i__3, i__4; /* Local variables */ static int k, ia, ja, jj, ki, kj, it, jt, mr, irm, jrm; /* -- PUMMA Package routine (version 2.1) -- */ /* Jaeyoung Choi, Oak Ridge National Laboratory. */ /* Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory. */ /* David Walker, Oak Ridge National Laboratory. */ /* October 31, 1994. */ /* Purpose */ /* T <== A' + beta*T (assume beta = 0.0, or 1.0) */ /* T is a scattered 2-D array from a scattered 2-D array A */ /* T = A' */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ ia = 0; jt = 0; if (*beta == 0.) { i__1 = *nrow - 2; for (ki = 0; ki <= i__1; ++ki) { ja = 0; it = 0; i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__3 = *nb; for (jj = 1; jj <= i__3; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L10: */ } } ja += commtrb_1.jaz; it += commtrb_1.itz; /* L20: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L30: */ } } } ia += commtrb_1.iaz; jt += commtrb_1.jtz; /* L40: */ } irm = *ilt - ia; if (irm > 0) { ja = 0; it = 0; mr = Mmin(irm,*mb); i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__4 = *nb; for (jj = 1; jj <= i__4; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L50: */ } } ja += commtrb_1.jaz; it += commtrb_1.itz; /* L60: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L70: */ } } } } } else { /* T = A' + T */ i__2 = *nrow - 2; for (ki = 0; ki <= i__2; ++ki) { ja = 0; it = 0; i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__4 = *nb; for (jj = 1; jj <= i__4; ++jj) { i__3 = *mb; for (k = 1; k <= i__3; ++k) { t[it + jj + (jt + k) * t_dim1] += a[ia + k + (ja + jj) * a_dim1]; /* L80: */ } } ja += commtrb_1.jaz; it += commtrb_1.itz; /* L90: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__3 = *mb; for (k = 1; k <= i__3; ++k) { t[it + jj + (jt + k) * t_dim1] += a[ia + k + (ja + jj) * a_dim1]; /* L100: */ } } } ia += commtrb_1.iaz; jt += commtrb_1.jtz; /* L110: */ } irm = *ilt - ia; if (irm > 0) { ja = 0; it = 0; mr = Mmin(irm,*mb); i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__3 = *nb; for (jj = 1; jj <= i__3; ++jj) { i__1 = mr; for (k = 1; k <= i__1; ++k) { t[it + jj + (jt + k) * t_dim1] += a[ia + k + (ja + jj) * a_dim1]; /* L120: */ } } ja += commtrb_1.jaz; it += commtrb_1.itz; /* L130: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__1 = mr; for (k = 1; k <= i__1; ++k) { t[it + jj + (jt + k) * t_dim1] += a[ia + k + (ja + jj) * a_dim1]; /* L140: */ } } } } } return 0; } /* dtr2mx_ */ static int dtr2bf_(double *a, int *lda, double *t, int *ldt, int *nrow, int *ncol, int *mb, int *nb, int *ilt, int *jlt) { /* System generated locals */ long a_dim1, a_offset, t_dim1, t_offset; int i__1, i__2, i__3, i__4; /* Local variables */ static int k, ia, ja, jj, ki, kj, it, jt, mr, irm, jrm; /* -- PUMMA Package routine (version 2.1) -- */ /* Jaeyoung Choi, Oak Ridge National Laboratory. */ /* Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory. */ /* David Walker, Oak Ridge National Laboratory. */ /* October 31, 1994. */ /* Purpose */ /* T <== A' */ /* T is a condensed 2-D buffer from a scattered 2-D array A */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; /* Function Body */ ia = 0; jt = 0; i__1 = *nrow - 2; for (ki = 0; ki <= i__1; ++ki) { ja = 0; it = 0; i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__3 = *nb; for (jj = 1; jj <= i__3; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L10: */ } } ja += commtrb_1.jaz; it += *nb; /* L20: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L30: */ } } } ia += commtrb_1.iaz; jt += *mb; /* L40: */ } irm = *ilt - ia; if (irm > 0) { ja = 0; it = 0; mr = Mmin(*mb,irm); i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__4 = *nb; for (jj = 1; jj <= i__4; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L50: */ } } ja += commtrb_1.jaz; it += *nb; /* L60: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { t[it + jj + (jt + k) * t_dim1] = a[ia + k + (ja + jj) * a_dim1]; /* L70: */ } } } } return 0; } /* dtr2bf_ */ static int dmv2mx_(double *t, int *ldt, double *beta, double *a, int *lda, int *nrow, int *ncol, int *mb, int *nb, int *ilt, int *jlt) { /* System generated locals */ long t_dim1, t_offset, a_dim1, a_offset; int i__1, i__2, i__3, i__4; /* Local variables */ static int k, ia, ja, jj, ki, kj, it, jt, mr, irm, jrm; /* -- PUMMA Package routine (version 2.1) -- */ /* Jaeyoung Choi, Oak Ridge National Laboratory. */ /* Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory. */ /* David Walker, Oak Ridge National Laboratory. */ /* October 31, 1994. */ /* Purpose */ /* A <== T + beta*A (assume beta = 0.0, or 1.0) */ /* A is a scattered 2-D array from a condensed 2-D buffer T */ /* Parameter adjustments */ t_dim1 = *ldt; t_offset = 1 + t_dim1; t -= t_offset; a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; /* Function Body */ it = 0; ia = 0; /* A <== T */ if (*beta == 0.) { /* If NPROW = 1, use DCOPY */ if (*nrow == 1) { jt = 0; ja = 0; i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__2 = *nb; for (jj = 1; jj <= i__2; ++jj) { i__3 = Mmin(*mb,*ilt); HPL_dcopy(i__3, &t[(jt + jj) * t_dim1 + 1], 1, &a[(ja + jj) * a_dim1 + 1], 1); /* L10: */ } jt += *nb; ja += commtrb_1.jtz; /* L20: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__2 = Mmin(*mb,*ilt); HPL_dcopy(i__2, &t[(jt + jj) * t_dim1 + 1], 1, &a[(ja + jj) * a_dim1 + 1], 1); /* L30: */ } } } else { i__1 = *nrow - 2; for (ki = 0; ki <= i__1; ++ki) { jt = 0; ja = 0; i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__3 = *nb; for (jj = 1; jj <= i__3; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { a[ia + k + (ja + jj) * a_dim1] = t[it + k + (jt + jj) * t_dim1]; /* L40: */ } } jt += *nb; ja += commtrb_1.jtz; /* L50: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__4 = *mb; for (k = 1; k <= i__4; ++k) { a[ia + k + (ja + jj) * a_dim1] = t[it + k + (jt + jj) * t_dim1]; /* L60: */ } } } it += *mb; ia += commtrb_1.itz; /* L70: */ } irm = *ilt - ia; if (irm > 0) { jt = 0; ja = 0; mr = Mmin(*mb,irm); i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__4 = *nb; for (jj = 1; jj <= i__4; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { a[ia + k + (ja + jj) * a_dim1] = t[it + k + (jt + jj) * t_dim1]; /* L80: */ } } jt += *nb; ja += commtrb_1.jtz; /* L90: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__2 = mr; for (k = 1; k <= i__2; ++k) { a[ia + k + (ja + jj) * a_dim1] = t[it + k + (jt + jj) * t_dim1]; /* L100: */ } } } } } /* A <== T + A */ } else { /* If NPROW = 1, use DAXPY */ if (*nrow == 1) { jt = 0; ja = 0; i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__1 = *nb; for (jj = 1; jj <= i__1; ++jj) { i__4 = Mmin(*mb,*ilt); HPL_daxpy(i__4, 1.0, &t[(jt + jj) * t_dim1 + 1], 1, &a[(ja + jj) * a_dim1 + 1], 1); /* L110: */ } jt += *nb; ja += commtrb_1.jtz; /* L120: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__1 = Mmin(*mb,*ilt); HPL_daxpy(i__1, 1.0, &t[(jt + jj) * t_dim1 + 1], 1, & a[(ja + jj) * a_dim1 + 1], 1); /* L130: */ } } } else { i__2 = *nrow - 2; for (ki = 0; ki <= i__2; ++ki) { jt = 0; ja = 0; i__1 = *ncol - 2; for (kj = 0; kj <= i__1; ++kj) { i__4 = *nb; for (jj = 1; jj <= i__4; ++jj) { i__3 = *mb; for (k = 1; k <= i__3; ++k) { a[ia + k + (ja + jj) * a_dim1] += t[it + k + (jt + jj) * t_dim1]; /* L140: */ } } jt += *nb; ja += commtrb_1.jtz; /* L150: */ } jrm = *jlt - ja; if (jrm > 0) { i__1 = Mmin(*nb,jrm); for (jj = 1; jj <= i__1; ++jj) { i__3 = *mb; for (k = 1; k <= i__3; ++k) { a[ia + k + (ja + jj) * a_dim1] += t[it + k + (jt + jj) * t_dim1]; /* L160: */ } } } it += *mb; ia += commtrb_1.itz; /* L170: */ } irm = *ilt - ia; if (irm > 0) { jt = 0; ja = 0; mr = Mmin(*mb,irm); i__2 = *ncol - 2; for (kj = 0; kj <= i__2; ++kj) { i__3 = *nb; for (jj = 1; jj <= i__3; ++jj) { i__1 = mr; for (k = 1; k <= i__1; ++k) { a[ia + k + (ja + jj) * a_dim1] += t[it + k + (jt + jj) * t_dim1]; /* L180: */ } } jt += *nb; ja += commtrb_1.jtz; /* L190: */ } jrm = *jlt - ja; if (jrm > 0) { i__2 = Mmin(*nb,jrm); for (jj = 1; jj <= i__2; ++jj) { i__1 = mr; for (k = 1; k <= i__1; ++k) { a[ia + k + (ja + jj) * a_dim1] += t[it + k + (jt + jj) * t_dim1]; /* L200: */ } } } } } } return 0; } /* dmv2mx_ */ int pdtrans(char *trans, int *m, int *n, int * mb, int *nb, double *a, int *lda, double *beta, double *c__, int *ldc, int *imrow, int *imcol, double *work, int *iwork) { /* System generated locals */ long a_dim1, a_offset, c_dim1, c_offset; int i__1, i__2, i__3, i__4; /* Local variables */ int j1, k1, k2, ml, nl, mp, mq, np, nq, mb0, mb1, mb2, nb0, nb1, nb2, kia, kja, kic, kjc, lbm, lbn, lcm, ldt, lbm0, lbm1, lbm2, lbn0, lbn1, lbn2, igcd; long ipt; int mcol, info, lcmp, lcmq, item, ncol, kmod1, kmod2; double tbeta; int kpcol, mpcol, npcol, mrcol, mycol, kprow, mprow, nprow, mrrow, myrow; /* -- PUMMA Package routine (version 2.1) -- */ /* Jaeyoung Choi, Oak Ridge National Laboratory. */ /* Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory. */ /* David Walker, Oak Ridge National Laboratory. */ /* October 31, 1994. */ /* Purpose */ /* PDTRANS routine is one of the PUMMA package based on block cyclic */ /* data distribution on 2-D process configuration. */ /* It is used for the following matrix transposition, */ /* Form C := A' + beta*C */ /* where beta is a scalar, and A and C are matrices, with A an M by N */ /* matrix (globally), and C an N by M matrix (globally). */ /* Parameters */ /* TRANS - (input) CHARACTER*1 */ /* TRANS specifies whether A is transposed or conjugate */ /* transposed. */ /* TRANS = 'T', transpose; */ /* TRANS = 'C', conjugate transpose. */ /* M - (input) INTEGER */ /* M specifies the (global) number of rows of the matrix A and */ /* the (global) number of rows of the matrix C. M >= 0. */ /* N - (input) INTEGER */ /* N specifies the (global) number of columns of the matrix A */ /* and columns of the matrix B. N >= 0. */ /* MB - (input) INTEGER */ /* MB specifies the row block size of the matrix A and the */ /* column block of the matrix C. MB >= 1. */ /* NB - (input) INTEGER */ /* NB specifies the column block size of the matrix A and the */ /* row block size of the matrix C. NB >= 1. */ /* A - (input) DOUBLE PRECISION array of DIMENSION ( LDA, Nq ). */ /* The leading Mp by Nq part of the array A must contain the */ /* local matrix A. Mp and Nq are local variables */ /* (see description of local parameters). */ /* LDA - (input) INTEGER */ /* The leading dimension of the (local) array A. */ /* LDA >= MAX( 1, Mp ). */ /* BETA - (input) DOUBLE PRECISION */ /* BETA specifies the scalar beta. When BETA is supplied as */ /* zero then C need not be set on input. */ /* C - (input/ouput) DOUBLE PRECISION array of DIMENSION (LDC, Mq). */ /* On entry the leading Np by Mq part of the array C must */ /* contain the local matrix C, except when beta is zero, */ /* in which case C need not be set on entry. */ /* On exit, the array C is overwritten by the Np by Mq matrix */ /* (A'+bata*C). Np and Mq are local variables */ /* (see description of local parameters). */ /* LDC - (input) INTEGER */ /* The leading dimension of the (local) array C. */ /* LDC >= MAX( 1, Np ). */ /* IMROW - (input) INTEGER */ /* IMROW specifies a row of the process template, which holds */ /* the first block of the matrices. 0 <= IMROW < NPROW. */ /* IMCOL - (input) INTEGER */ /* IMCOL specifies a column of the process template, which */ /* holds the first block of the matrices. 0 <= IMCOL < NPCOL. */ /* WORK - (workspace) DOUBLE PRECISION array */ /* See requirements. */ /* IWORK - (workspace) INTEGER array */ /* See requirements. */ /* Local Parameters */ /* LCM = the lowest common multiple of P and Q */ /* LCMP = LCM/P = number of template rows in LCM block */ /* LCMQ = LCM/Q = number of template columns in LCM block */ /* IGCD = the greatest common divisor (GCD) of P and Q */ /* MpxNq = size of (local) matrix A in the process, iam */ /* NpxMq = size of (local) matrix C in the process, iam */ /* KMOD = Define Group I.D. */ /* item = temporal integer parameter */ /* Two buffers for storing A' and T(= subblock of A') */ /* WORK <== A' */ /* WORK(IPT) <== T */ /* Three interger buffers */ /* IWORK(1,k) <== starting point of row subblock of A to send and */ /* C to receive in K2 loop (rowwise communication) */ /* IWORK(2,k) <== starting point of column subblock of A to send in */ /* J1 loop (columnwise communication) */ /* IWORK(3,k) <== starting point of column subblock of C to receive */ /* in J1 loop (columnwise communication) */ /* Requirements (approximate) */ /* Size(IWORK) = 3 x MAX(P, Q) */ /* Size(WORK) = 2 x Ceil(Ceil(M,MB),LCM)xMB x Ceil(Ceil(N,NB),LCM)xNB */ /* Get grid parameters */ /* Parameter adjustments */ a_dim1 = *lda; a_offset = 1 + a_dim1; a -= a_offset; c_dim1 = *ldc; c_offset = 1 + c_dim1; c__ -= c_offset; --work; --iwork; /* Function Body */ Cblacs_gridinfo(context_1.ictxt, &nprow, &npcol, &myrow, &mycol); /* Test for the input parameters. */ info = 0; if (*trans != 'T' && *trans != 'C') { info = 1; } else if (*m < 0) { info = 2; } else if (*n < 0) { info = 3; } else if (*mb < 1) { info = 4; } else if (*nb < 1) { info = 5; } else if (*lda < 1) { info = 7; } else if (*ldc < 1) { info = 10; } else if (*imrow < 0 || *imrow >= nprow) { info = 11; } else if (*imcol < 0 || *imcol >= npcol) { info = 12; } L10: if (info != 0) { pxerbla( &context_1.ictxt, "PDTRANS", &info ); return 0; } /* Initialize parameters */ mprow = nprow + myrow; mpcol = npcol + mycol; mrrow = (mprow - *imrow) % nprow; mrcol = (mpcol - *imcol) % npcol; lcm = ilcm_(&nprow, &npcol); lcmp = lcm / nprow; lcmq = lcm / npcol; igcd = nprow / lcmq; mp = numroc_(m, mb, &mrrow, &c__0, &nprow); mq = numroc_(m, mb, &mrcol, &c__0, &npcol); np = numroc_(n, nb, &mrrow, &c__0, &nprow); nq = numroc_(n, nb, &mrcol, &c__0, &npcol); i__1 = iceil_(m, mb); lbm = iceil_(&i__1, &lcm); i__1 = iceil_(n, nb); lbn = iceil_(&i__1, &lcm); /* Test for the input parameters again with local parameters */ if (*lda < mp) { info = 7; } else if (*ldc < np) { info = 10; } if (info != 0) { goto L10; } /* Quick return if possible. */ if (*m == 0 || *n == 0) { return 0; } /* At first, scale C with beta if beta != 0.0 & beta != 1.0 */ tbeta = *beta; if (*beta != 0. && *beta != 1.) { i__1 = mq; for (j1 = 1; j1 <= i__1; ++j1) { HPL_dscal( np, *beta, &c__[j1 * c_dim1 + 1], 1 ); /* L20: */ } tbeta = 1.; } commtrb_1.iaz = lcmp * *mb; commtrb_1.jaz = lcmq * *nb; commtrb_1.itz = lcmp * *nb; commtrb_1.jtz = lcmq * *mb; ml = lbm * *mb; nl = lbn * *nb; ipt = (long)ml * (long)nl + 1; ldt = nl; kprow = mrrow + nprow; kpcol = mrcol + npcol; /* Initialize Parameters -- Compute the positions of subblocks */ i__1 = npcol - 1; for (k1 = 0; k1 <= i__1; ++k1) { ncol = (kpcol - k1) % npcol; i__2 = lcmq - 1; for (j1 = 0; j1 <= i__2; ++j1) { item = npcol * j1 + ncol; if (item % nprow == mrrow) { iwork[ncol * 3 + 1] = item / nprow; } /* L30: */ } } i__2 = lcmq - 1; for (j1 = 0; j1 <= i__2; ++j1) { item = (npcol * j1 + mrcol) % nprow; iwork[item * 3 + 2] = j1; iwork[item * 3 + 3] = j1; i__1 = igcd - 1; for (k1 = 1; k1 <= i__1; ++k1) { iwork[(item + nprow - k1) % nprow * 3 + 2] = j1; iwork[(item + k1) % nprow * 3 + 3] = j1; /* L40: */ } } /* Set parameters for efficient copying */ lbm0 = lbm; lbm1 = lbm; lbm2 = lbm; lbn0 = lbn; lbn1 = lbn; lbn2 = lbn; mb0 = *mb; mb1 = *mb; mb2 = *mb; nb0 = *nb; nb1 = *nb; nb2 = *nb; if (nprow == npcol) { lbm0 = 1; lbn0 = 1; mb0 = mp; nb0 = nq; } if (nprow == lcm) { lbm1 = 1; lbn2 = 1; mb1 = mp; nb2 = np; } if (npcol == lcm) { lbn1 = 1; lbm2 = 1; nb1 = nq; mb2 = mq; } /* For each K2 loop (rowwise), Copy A' to WORK & Send it to KTPROC */ /* then, Receive WORK and Copy WORK to C */ kmod1 = (nprow + mrcol - mrrow) % igcd; kmod2 = (igcd - kmod1) % igcd; i__1 = lcmp - 1; for (k2 = 0; k2 <= i__1; ++k2) { /* Copy A' to WORK in the appropriate order & Send it */ k1 = k2 * igcd + kmod1; mcol = (kpcol - k1) % npcol; kia = iwork[mcol * 3 + 1] * *mb; mcol = (mcol + *imcol) % npcol; ncol = (mrcol + k2 * igcd + kmod2) % npcol; kic = iwork[ncol * 3 + 1] * *nb; ncol = (ncol + *imcol) % npcol; i__2 = lcmq - 1; for (j1 = 0; j1 <= i__2; ++j1) { kja = iwork[(mrrow + igcd * j1) % nprow * 3 + 2] * *nb; if (myrow == (myrow + igcd * j1 + kmod1) % nprow && mycol == mcol) { kjc = iwork[(kprow - igcd * j1) % nprow * 3 + 3] * *mb; i__3 = mp - kia; i__4 = nq - kja; dtr2mx_(&a[kia + 1 + (kja + 1) * a_dim1], lda, &tbeta, &c__[ kic + 1 + (kjc + 1) * c_dim1], ldc, &lbm0, &lbn0, & mb0, &nb0, &i__3, &i__4); } else { i__3 = mp - kia; i__4 = nq - kja; dtr2bf_(&a[kia + 1 + (kja + 1) * a_dim1], lda, &work[1], &ldt, &lbm1, &lbn1, &mb1, &nb1, &i__3, &i__4); if (nprow == npcol && *beta == 0. && *ldc == ldt) { i__3 = (myrow + igcd * j1 + kmod1) % nprow; i__4 = (mprow - igcd * j1 - kmod2) % nprow; kjc = iwork[(kprow - igcd * j1) % nprow * 3 + 3] * *mb; #if 0 Cdgesd2d(context_1.ictxt,nl,ml,&work[1],nl,i__3,mcol); Cdgerv2d(context_1.ictxt,nl,ml,&c__[(kjc + 1) * c_dim1 + 1],*ldc,i__4,ncol); #else Cblacs_dSendrecv( context_1.ictxt, nl, ml, &work[1], nl, i__3, mcol, nl, ml, &c__[(kjc + 1) * c_dim1 + 1], *ldc, i__4, ncol ); #endif } else { i__3 = (myrow + igcd * j1 + kmod1) % nprow; i__4 = (mprow - igcd * j1 - kmod2) % nprow; #if 0 Cdgesd2d(context_1.ictxt,nl,ml,&work[1],nl,i__3,mcol); Cdgerv2d(context_1.ictxt,nl,ml,&work[ipt],nl, i__4,ncol); #else Cblacs_dSendrecv( context_1.ictxt, nl, ml, &work[1], nl, i__3, mcol, nl, ml, &work[ipt], nl, i__4, ncol ); #endif kjc = iwork[(kprow - igcd * j1) % nprow * 3 + 3] * *mb; i__3 = np - kic; i__4 = mq - kjc; dmv2mx_(&work[ipt], &ldt, &tbeta, &c__[kic + 1 + (kjc + 1) * c_dim1], ldc, &lbn2, &lbm2, &nb2, &mb2, &i__3, &i__4); } } } } return 0; } /* pdtrans_ */ hpcc-1.4.1/PTRANS/pdtransdriver.c0000644000000000000000000003612011256503657013377 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* pdtransdriver.c -- PUMMA Package routine (version 2.1) -- Jaeyoung Choi, Oak Ridge National Laboratory. Jack Dongarra, Univ. of Tennessee, Oak Ridge National Laboratory. David Walker, Oak Ridge National Laboratory. March 26, 1995. Purpose: Driver routine for testing the full matrix transpose. */ #include #include "cblacslt.h" /* Common Block Declarations */ struct { int ictxt; } context_; #define context_1 context_ /* Table of constant values */ static int c__1 = 1; static int c__0 = 0; static void param_dump(FILE *outFile, char *name, int n, int *vals) { int j; fprintf( outFile, "%s:", name ); for (j = 0; j < n; ++j) fprintf( outFile, " %d", vals[j] ); fprintf( outFile, "\n" ); } static void param_illegal(int iam, FILE *outFile, char *fmt, char *contxt, char *val_name, int x) { if (0 != iam) return; if (val_name[0]) fprintf( outFile, fmt, contxt, val_name, x ); else fprintf( outFile, fmt, contxt ); fprintf( outFile, "\n" ); } static void param_allred_sum(int *ierr) { int success; MPI_Allreduce( ierr, &success, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); ierr[0] = success; } static void grid_map(int np_me, int npall, int nprow, int npcol, int seed, int *umap) { int i, j, k, rval[2], rmul[2], radd[2]; if (seed < 0) seed = -seed; rval[1] = (seed >> 16) & 32767; rval[0] = seed & 65535; rmul[0] = 20077; rmul[1] = 16838; radd[0] = 12345; radd[1] = 0; setran_( rval, rmul, radd ); pdrand(); for (i = 0; i < npall; ++i) umap[i] = i; for (i = 0; i < npall; ++i) { j = pdrand() * npall; /* swap entries i and j */ k = umap[j]; umap[j] = umap[i]; umap[i] = k; } } int PTRANS(HPCC_Params *params) { /* calculation of passed/failed/skipped tests assumes that MPI rank 0 is 0x0 in CBLACS */ int ktests = 0; int kpass = 0; int kfail = 0; int kskip = 0; int i__, j, m, n; int mb, nb, ii, mg, ng, mp, mq, np, nq; int mp0, mq0, np0, nq0, lda, ldc, iam, lcm; double eps, *mem; int *imem; long ipa, ipc, ipw, ipiw, isw; int nmat, *mval, ierr[1], *nval; int nbmat, *mbval, imcol, *nbval; double ctime[2], resid, resid0 = 1.0; int npcol, *npval, mycol, *nqval; double wtime[2]; int imrow, nprow, myrow, iaseed = 100, proc_seed; char *passed; int ngrids; double thresh; int nprocs; FILE *outFile; double curGBs, curGBs_0, cpuGBs, *GBs; int AllocSuccessful, grid_cnt, r0x0, r0_ingrid; int icseed = 200; double d_One = 1.0; long dMemSize, li; GBs = ¶ms->PTRANSrdata.GBs; *GBs = curGBs = 0.0; Cblacs_pinfo(&iam, &nprocs); if (0 == iam) { outFile = fopen( params->outFname, "a" ); if (! outFile) outFile = stderr; } else outFile = stderr; nmat = params->PTRANSns; mval = params->PTRANSnval; nval = params->PTRANSnval; nbmat = params->PTRANSnbs; mbval = params->PTRANSnbval; nbval = params->PTRANSnbval; ngrids = params->PTRANSnpqs; npval = params->PTRANSpval; nqval = params->PTRANSqval; thresh = params->test.thrsh; eps = params->test.epsil; imrow = imcol = 0; /* calculate and allocate memory */ AllocSuccessful = 0; MaxMem( nprocs, imrow, imcol, nmat, mval, nval, nbmat, mbval, nbval, ngrids, npval, nqval, &dMemSize ); mem = NULL; imem = NULL; if (dMemSize > 0) { mem = HPCC_XMALLOC( double, dMemSize ); imem = HPCC_XMALLOC( int, (3 * nprocs) ); if (mem && imem) AllocSuccessful = 1; } MPI_Allreduce( &AllocSuccessful, ierr, 1, MPI_INT, MPI_MIN, MPI_COMM_WORLD ); if (ierr[0] < 1) { if (imem) HPCC_free(imem); if (mem) HPCC_free(mem); if (0 == iam) fprintf( outFile, "Failed to allocate %ld doubles\n", dMemSize ); goto mem_failure; } /* initialize working arrays; it is necessary because on some systems it will contain NaNs * (Not a Number) and NaTs (Not a Thing) and this makes pdmatgen() work incorrectly * (0.0 * NaN may cause exception) */ for (li = 0; li < dMemSize; li++) mem[li] = 0.0; for (j = 0; j < 3 * nprocs; j++) imem[j] = 0; /* Print headings */ if (0 == iam) { /* matrix sizes */ param_dump( outFile, "M", nmat, mval ); param_dump( outFile, "N", nmat, nval ); /* block sizes */ param_dump( outFile, "MB", nbmat, mbval ); param_dump( outFile, "NB", nbmat, nbval ); /* process grids */ param_dump( outFile, "P", ngrids, npval ); param_dump( outFile, "Q", ngrids, nqval ); fprintf( outFile, "TIME M N MB NB P Q TIME CHECK GB/s RESID\n" "---- ----- ----- --- --- --- --- -------- ------ -------- -----\n" ); fflush( outFile ); } /* Loop over different process grids */ for (j = 0; j < ngrids; ++j) { nprow = npval[j]; npcol = nqval[j]; /* Make sure grid information is correct */ ierr[0] = 0; if (nprow < 1) { param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "GRID", "nprow", nprow ); ierr[0] = 1; } else if (npcol < 1) { param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "GRID", "npcol", npcol ); ierr[0] = 1; } else if (nprow * npcol > nprocs) { param_illegal( iam, outFile, "ILLEGAL %s: %s = %d. Too many processes requested.", "GRID", "nprow*npcol-nprocs", nprow * npcol - nprocs ); ierr[0] = 1; } param_allred_sum( ierr ); if (ierr[0] > 0) { param_illegal( iam, outFile, "Bad %s parameters: going on to next test case.", "grid", "", 0 ); ++kskip; continue; } for (i__ = 0; i__ < nmat; ++i__) { m = mval[i__]; n = nval[i__]; /* Make sure matrix information is correct */ ierr[0] = 0; if (m < 1) { param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "MATRIX", "M", m ); ierr[0] = 1; } else if (n < 1) { param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "MATRIX", "N", n ); ierr[0] = 1; } /* Make sure no one had error */ param_allred_sum( ierr ); if (ierr[0] > 0) { param_illegal( iam, outFile, "Bad %s parameters: going on to next test case.", "MATRIX", "", 0 ); ++kskip; continue; } /* Loop over different block sizes */ for (ii = 1; ii <= nbmat; ++ii) { mb = mbval[ii - 1]; nb = nbval[ii - 1]; /* Make sure blocking sizes are legal */ ierr[0] = 0; if (mb < 1) { ierr[0] = 1; param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "MB", "MB", mb ); } else if (nb < 1) { ierr[0] = 1; param_illegal( iam, outFile, "ILLEGAL %s: %s = %d; It should be at least 1", "NB", "NB", nb ); } /* Make sure no one had error */ param_allred_sum( ierr ); if (ierr[0] > 0) { param_illegal( iam, outFile, "Bad %s parameters: going on to next test case.", "NB", "", 0 ); ++kskip; continue; } for (grid_cnt = 0; grid_cnt < 5; ++grid_cnt) { /* Make sure all processes have the same seed */ mp = (int)time(NULL); MPI_Allreduce( &mp, &proc_seed, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); /* Define process grid */ Cblacs_get(-1, 0, &context_1.ictxt); grid_map( iam, nprocs, nprow, npcol, proc_seed, imem ); Cblacs_gridmap( &context_1.ictxt, imem, npcol, nprow, npcol ); Cblacs_gridinfo(context_1.ictxt, &nprow, &npcol, &myrow, &mycol); /* Make sure all processes know who's 0x0 */ mp = (0 == myrow && 0 == mycol) ? iam : 0; MPI_Allreduce( &mp, &r0x0, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); r0_ingrid = 1; /* Go to bottom of process grid loop if this case doesn't use my process */ if (myrow >= nprow || mycol >= npcol) { /* nprow and npcol were lost in the call to Cblacs_gridinfo */ nprow = npval[j]; npcol = nqval[j]; /* reporting must be done on process 0 */ if (0 != iam) continue; r0_ingrid = 0; goto report; } mp = numroc_(&m, &mb, &myrow, &imrow, &nprow); mq = numroc_(&m, &mb, &mycol, &imcol, &npcol); np = numroc_(&n, &nb, &myrow, &imrow, &nprow); nq = numroc_(&n, &nb, &mycol, &imcol, &npcol); mg = iceil_(&m, &mb); ng = iceil_(&n, &nb); mp0 = iceil_(&mg, &nprow) * mb; mq0 = iceil_(&mg, &npcol) * mb; np0 = iceil_(&ng, &nprow) * nb; nq0 = iceil_(&ng, &npcol) * nb; lcm = ilcm_(&nprow, &npcol); ipc = 1; ipa = ipc + (long)np0 * (long)mq0; ipiw = (long)mp0 * (long)nq0 + ipa; ipw = ipiw; isw = ipw + (long)(iceil_(&mg, &lcm) << 1) * (long)mb * (long)iceil_(&ng, &lcm) * (long)nb; /* Make sure have enough memory to handle problem */ if (isw > dMemSize) { param_illegal( iam, outFile, "Unable to perform %s: need %s of at least %d thousand doubles\n", "PTRANS", "memory", (int)((isw + 999)/ 1000) ); ierr[0] = 1; } /* Make sure no one had error */ Cigsum2d(context_1.ictxt,"a","h",1,1,ierr, 1,-1,0); if (ierr[0] > 0) { param_illegal( iam, outFile, "Bad %s parameters: going on to next test case.", "MEMORY", "", 0 ); ++kskip; continue; } /* Generate matrix A */ lda = Mmax(1,mp); /* A = rand(m, n, iaseed) */ pdmatgen(&context_1.ictxt, "N", "N", &m, &n, &mb, &nb, &mem[ipa - 1], &lda, &imrow, &imcol, &iaseed, &c__0, &mp, &c__0, &nq, &myrow, &mycol, &nprow, &npcol, 0.0); /* C = rand(n, m, icseed) */ pdmatgen(&context_1.ictxt, "T", "N", &n, &m, &nb, &mb, &mem[ipc - 1], &lda, &imrow, &imcol, &icseed, &c__0, &np, &c__0, &mq, &myrow, &mycol, &nprow, &npcol, 0.0); slboot_(); Cblacs_barrier(context_1.ictxt, "All"); sltimer_(&c__1); /* Perform the matrix transpose */ ldc = Mmax(1,np); /* C := A' + d_One * C */ pdtrans( "T", &m, &n, &mb, &nb, &mem[ipa - 1], &lda, &d_One, &mem[ipc - 1], &ldc, &imrow, &imcol, &mem[ipw - 1], imem ); sltimer_(&c__1); if (thresh > 0.0) { /* Regenerate matrix A in transpose form (A') */ lda = Mmax(1,np); /* A = rand(n, m, icseed) */ pdmatgen( &context_1.ictxt, "T", "N", &n, &m, &nb, &mb, &mem[ipa - 1], &lda, &imrow, &imcol, &icseed, &c__0, &np, &c__0, &mq, &myrow, &mycol, &nprow, &npcol, 0.0); /* A += rand(m, n, iaseed) */ pdmatgen( &context_1.ictxt, "T", "N", &m, &n, &mb, &nb, &mem[ipa - 1], &lda, &imrow, &imcol, &iaseed, &c__0, &mp, &c__0, &nq, &myrow, &mycol, &nprow, &npcol, 1.0); /* Compare A' to C */ pdmatcmp(&context_1.ictxt, &np, &mq, &mem[ipa - 1], &lda, &mem[ipc - 1], &ldc, &resid); resid0 = resid; resid /= eps * Mmax( m, n ); if (resid <= thresh && resid - resid == 0.0) { /* if `resid' is small and is not NaN */ ++kpass; passed = "PASSED"; } else { ++kfail; passed = "FAILED"; } } else { /* Don't perform the checking, only the timing operation */ ++kpass; resid -= resid; passed = "BYPASS"; } /* Gather maximum of all CPU and WALL clock timings */ slcombine_(&context_1.ictxt, "All", ">", "W", &c__1, &c__1, wtime); slcombine_(&context_1.ictxt, "All", ">", "C", &c__1, &c__1, ctime); Cblacs_gridexit(context_1.ictxt); report: if (0 != r0x0) { double dva[3]; MPI_Status status; if (r0x0 == iam) { dva[0] = wtime[0]; dva[1] = ctime[0]; dva[2] = passed[0]; MPI_Send( dva, 3, MPI_DOUBLE, 0, 0, MPI_COMM_WORLD ); } if (0 == iam) { MPI_Recv( dva, 3, MPI_DOUBLE, r0x0, 0, MPI_COMM_WORLD, &status ); if (! r0_ingrid) { /* if 0's process not in grid, timing and pass/fail info is missing */ wtime[0] = dva[0]; ctime[0] = dva[1]; switch ((int)(dva[2])) { case 'F': passed = "FAILED"; ++kfail; break; case 'B': passed = "BYPASS"; ++kpass; break; default: passed = "PASSED"; ++kpass; break; } } } } /* Print results */ if (0 == iam) { /* Print WALL time if machine supports it */ if (wtime[0] > 0.0) { curGBs_0 = 1e-9 / wtime[0] * m * n * sizeof(double); if (0 == grid_cnt) curGBs = curGBs_0; if (curGBs > curGBs_0) /* take minimum performance */ curGBs = curGBs_0; fprintf( outFile, "WALL %5d %5d %3d %3d %3d %3d %8.2f %s %8.3f %5.2f\n", m, n, mb, nb, nprow, npcol, wtime[0], passed, curGBs, resid ); } /* Print CPU time if machine supports it */ if (ctime[0] > 0.0) { cpuGBs = 1e-9 / ctime[0] * m * n * sizeof(double); fprintf( outFile, "CPU %5d %5d %3d %3d %3d %3d %8.2f %s %8.3f %5.2f\n", m, n, mb, nb, nprow, npcol, ctime[0], passed, cpuGBs, resid ); } } } if (0 == iam && curGBs > *GBs) { *GBs = curGBs; params->PTRANSrdata.time = wtime[0]; params->PTRANSrdata.residual = resid0; params->PTRANSrdata.n = n; params->PTRANSrdata.nb = nb; params->PTRANSrdata.nprow = nprow; params->PTRANSrdata.npcol = npcol; } } } } if (imem) HPCC_free( imem ); if (mem) HPCC_free( mem ); mem_failure: /* Print out ending messages and close output file */ if (0 == iam) { ktests = kpass + kfail + kskip; fprintf( outFile, "\nFinished %4d tests, with the following results:\n", ktests ); if (thresh > 0.0) { fprintf( outFile, "%5d tests completed and passed residual checks.\n", kpass ); fprintf( outFile, "%5d tests completed and failed residual checks.\n", kfail ); } else { fprintf( outFile, "%5d tests completed without checking.\n", kpass ); } fprintf( outFile, "%5d tests skipped because of illegal input values.\n", kskip ); fprintf( outFile, "\nEND OF TESTS.\n" ); if (outFile != stdout && outFile != stderr) fclose( outFile ); } Cblacs_exit(1); /* if at least one test failed or was skipped then it's a total failure */ MPI_Reduce( &kfail, &ktests, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD ); if (ktests) params->Failure = 1; MPI_Reduce( &kskip, &ktests, 1, MPI_INT, MPI_MAX, 0, MPI_COMM_WORLD ); if (ktests) params->Failure = 1; return 0; } hpcc-1.4.1/PTRANS/pmatgeninc.c0000644000000000000000000000444711256503657012644 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ #include static void PTRANS_ladd(int *J, int *K, int *I) { int itmp0 = K[0] + J[0], itmp1; itmp1 = itmp0 >> 16; I[0] = itmp0 - ( itmp1 << 16 ); itmp0 = itmp1 + K[1] + J[1]; I[1] = itmp0 - (( itmp0 >> 15 ) << 15); } static void PTRANS_lmul(int *K, int *J, int *I) { static int ipow30 = ( 1 << 30 ); int kt, lt; kt = K[0] * J[0]; if( kt < 0 ) kt = ( kt + ipow30 ) + ipow30; I[0] = kt - ( ( kt >> 16 ) << 16 ); lt = K[0] * J[1] + K[1] * J[0]; if( lt < 0 ) lt = ( lt + ipow30 ) + ipow30; kt = ( kt >> 16 ) + lt; if( kt < 0 ) kt = ( kt + ipow30 ) + ipow30; I[1] = kt - ( ( kt >> 15 ) << 15 ); } static struct { int irand[2], ias[2], ics[2]; } rancom_; #define rancom_1 rancom_ /* -- ScaLAPACK routines (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ int xjumpm_(int *jumpm, int *mult, int *iadd, int *irann, int *iranm, int *iam, int *icm) { int i__1; int i, j[2]; if (*jumpm > 0) { for (i = 0; i < 2; ++i) { iam[i] = mult[i]; icm[i] = iadd[i]; } i__1 = *jumpm - 1; for (i = 0; i < i__1; ++i) { PTRANS_lmul( iam, mult, j); iam[0] = j[0]; iam[1] = j[1]; PTRANS_lmul( icm, mult, j ); PTRANS_ladd( iadd, j, icm ); } PTRANS_lmul( irann, iam, j ); PTRANS_ladd( j, icm, iranm ); } else { iranm[0] = irann[0]; iranm[1] = irann[1]; } return 0; } /* xjumpm_ */ int setran_(int *iran, int *ia, int *ic) { int i; for (i = 0; i < 2; ++i) { rancom_1.irand[i] = iran[i]; rancom_1.ias[i] = ia[i]; rancom_1.ics[i] = ic[i]; } return 0; } /* setran_ */ int jumpit_(int *mult, int *iadd, int *irann, int *iranm) { int j[2]; PTRANS_lmul( irann, mult, j); PTRANS_ladd( j, iadd, iranm ); rancom_1.irand[0] = iranm[0]; rancom_1.irand[1] = iranm[1]; return 0; } /* jumpit_ */ double pdrand() { /* System generated locals */ double ret_val; /* Local variables */ int j[2]; ret_val = ((double) rancom_1.irand[0] + (double) rancom_1.irand[1] * 65536.0) / 2147483648.0; PTRANS_lmul(rancom_1.irand, rancom_1.ias, j); PTRANS_ladd(j, rancom_1.ics, rancom_1.irand); return ret_val; } /* pdrand */ hpcc-1.4.1/PTRANS/sclapack.c0000644000000000000000000002511711256503657012275 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ #include #include "cblacslt.h" /* Common Block Declarations */ struct { double cpusec[64], wallsec[64], cpustart[64], wallstart[64]; int disabled; } sltimer00_; #define sltimer00_1 sltimer00_ /* Table of constant values */ static int c__1 = 1; static int c_n1 = -1; static int c__0 = 0; int iceil_(int *inum, int *idenom) { /* System generated locals */ int ret_val; /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* ICEIL returns the ceiling of the division of two integers. */ /* Arguments */ /* INUM (local input) INTEGER */ /* The numerator, */ /* IDENOM (local input) INTEGER */ /* and the denominator of the fraction to be evaluated. */ ret_val = (*inum + *idenom - 1) / *idenom; return ret_val; } /* iceil_ */ int numroc_(int *n, int *nb, int *iproc, int *isrcproc, int *nprocs) { /* System generated locals */ int ret_val; /* Local variables */ int extrablks, mydist, nblocks; /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* NUMROC computes the NUMber of Rows Or Columns of a distributed */ /* matrix owned by the process indicated by IPROC. */ /* Arguments */ /* N (global input) INTEGER */ /* The number of rows/columns in distributed matrix. */ /* NB (global input) INTEGER */ /* Block size, size of the blocks the distributed matrix is */ /* split into. */ /* IPROC (local input) INTEGER */ /* The coordinate of the process whose local array row or */ /* column is to be determined. */ /* ISRCPROC (global input) INTEGER */ /* The coordinate of the process that possesses the first */ /* row or column of the distributed matrix. */ /* NPROCS (global input) INTEGER */ /* The total number processes over which the matrix is */ /* distributed. */ /* Figure PROC's distance from source process */ mydist = (*nprocs + *iproc - *isrcproc) % *nprocs; /* Figure the total number of whole NB blocks N is split up into */ nblocks = *n / *nb; /* Figure the minimum number of rows/cols a process can have */ ret_val = nblocks / *nprocs * *nb; /* See if there are any extra blocks */ extrablks = nblocks % *nprocs; /* If I have an extra block */ if (mydist < extrablks) { ret_val += *nb; /* If I have last block, it may be a partial block */ } else if (mydist == extrablks) { ret_val += *n % *nb; } return ret_val; } /* numroc_ */ int ilcm_(int *m, int *n) { /* System generated locals */ int ret_val; /* Local variables */ int ia, iq, ir; /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* ILCM computes and returns the Least Common Multiple (LCM) of two */ /* positive integers M and N. In fact the routine computes the greatest */ /* common divisor (GCD) and use the fact that M*N = GCD*LCM. */ /* Arguments */ /* M (input) INTEGER */ /* On entry, M >=0. Unchanged on exit. */ /* N (input) INTEGER */ /* On entry, N >=0. Unchanged on exit. */ if (*m >= *n) { ia = *m; ret_val = *n; } else { ia = *n; ret_val = *m; } for (;;) { iq = ia / ret_val; ir = ia - iq * ret_val; if (ir == 0) { ret_val = *m * *n / ret_val; return ret_val; } ia = ret_val; ret_val = ir; } } /* ilcm_ */ int pxerbla(int *ictxt, char *srname, int *info) { /* Format strings */ char fmt_9999[] = "{ %5d, %5d }: On entry " "to %s() parameter number %4d had an illegal value" "\n"; int npcol, mycol, nprow, myrow; /* -- ScaLAPACK auxiliary routine (version 2.0) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* April 1, 1998 */ /* Purpose */ /* PXERBLA is an error handler for the ScaLAPACK routines. It is called */ /* by a ScaLAPACK routine if an input parameter has an invalid value. A */ /* message is printed. Installers may consider modifying this routine in */ /* order to call system-specific exception-handling facilities. */ /* Arguments */ /* ICTXT (local input) INTEGER */ /* On entry, ICTXT specifies the BLACS context handle, indica- */ /* ting the global context of the operation. The context itself */ /* is global, but the value of ICTXT is local. */ /* SRNAME (global input) CHARACTER*(*) */ /* On entry, SRNAME specifies the name of the routine which cal- */ /* ling PXERBLA. */ /* INFO (global input) INTEGER */ /* On entry, INFO specifies the position of the invalid parame- */ /* ter in the parameter list of the calling routine. */ /* -- Written on April 1, 1998 by */ /* Antoine Petitet, University of Tennessee, Knoxville 37996, USA. */ Cblacs_gridinfo(*ictxt, &nprow, &npcol, &myrow, &mycol); printf( fmt_9999, myrow, mycol, srname, *info ); fflush( stdout ); return 0; } /* pxerbla */ int slboot_(void) { static int i__; /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* SLBOOT (re)sets all timers to 0, and enables SLtimer. */ sltimer00_1.disabled = 0; for (i__ = 1; i__ <= 64; ++i__) { sltimer00_1.cpusec[i__ - 1] = 0.; sltimer00_1.wallsec[i__ - 1] = 0.; sltimer00_1.cpustart[i__ - 1] = -5.; sltimer00_1.wallstart[i__ - 1] = -5.; /* L10: */ } return 0; } /* slboot_ */ int sltimer_(int *i__) { /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* SLtimer provides a "stopwatch" functionality cpu/wall timer */ /* (in seconds). Up to 64 separate timers can be functioning at once. */ /* The first call starts the timer, and the second stops it. This */ /* routine can be disenabled, so that calls to the timer are ignored. */ /* This feature can be used to make sure certain sections of code do */ /* not affect timings, even if they call routines which have SLtimer */ /* calls in them. */ /* Arguments */ /* I (global input) INTEGER */ /* The timer to stop/start. */ /* If timing disabled, return */ if (sltimer00_1.disabled) { return 0; } if (sltimer00_1.wallstart[*i__ - 1] == -5.) { /* If timer has not been started, start it */ sltimer00_1.wallstart[*i__ - 1] = dwalltime00(); sltimer00_1.cpustart[*i__ - 1] = dcputime00(); } else { /* Stop timer and add interval to count */ sltimer00_1.cpusec[*i__ - 1] = sltimer00_1.cpusec[*i__ - 1] + dcputime00() - sltimer00_1.cpustart[*i__ - 1]; sltimer00_1.wallsec[*i__ - 1] = sltimer00_1.wallsec[*i__ - 1] + dwalltime00() - sltimer00_1.wallstart[*i__ - 1]; sltimer00_1.wallstart[*i__ - 1] = -5.; } return 0; } /* sltimer_ */ int slcombine_(int *ictxt, char *scope, char *op, char * timetype, int *n, int *ibeg, double *times) { /* System generated locals */ int i__1; /* Local variables */ static int i__; static int tmpdis; /* -- ScaLAPACK tools routine (version 1.7) -- */ /* University of Tennessee, Knoxville, Oak Ridge National Laboratory, */ /* and University of California, Berkeley. */ /* May 1, 1997 */ /* Purpose */ /* SLCOMBINE takes the timing information stored on a scope of processes */ /* and combines them into the user's TIMES array. */ /* Arguments */ /* ICTXT (local input) INTEGER */ /* The BLACS context handle. */ /* SCOPE (global input) CHARACTER */ /* Controls what processes in grid participate in combine. */ /* Options are 'Rowwise', 'Columnwise', or 'All'. */ /* OP (global input) CHARACTER */ /* Controls what combine should be done: */ /* = '>': get maximal time on any process (default), */ /* = '<': get minimal time on any process, */ /* = '+': get sum of times across processes. */ /* TIMETYPE (global input) CHARACTER */ /* Controls what time will be returned in TIMES: */ /* = 'W': wall clock time, */ /* = 'C': CPU time (default). */ /* N (global input) INTEGER */ /* The number of timers to combine. */ /* IBEG (global input) INTEGER */ /* The first timer to be combined. */ /* TIMES (global output) DOUBLE PRECISION array, dimension (N) */ /* The requested timing information is returned in this array. */ /* Disable timer for combine operation */ /* Parameter adjustments */ --times; /* Function Body */ tmpdis = sltimer00_1.disabled; sltimer00_1.disabled = 1; /* Copy timer information into user's times array */ if (*timetype == 'W') { /* If walltime not available on this machine, fill in times */ /* with -1 flag, and return */ if (dwalltime00() == -1.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { times[i__] = -1.; /* L10: */ } return 0; } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { times[i__] = sltimer00_1.wallsec[*ibeg + i__ - 2]; /* L20: */ } } } else { if (dcputime00() == -1.) { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { times[i__] = -1.; /* L30: */ } return 0; } else { i__1 = *n; for (i__ = 1; i__ <= i__1; ++i__) { times[i__] = sltimer00_1.cpusec[*ibeg + i__ - 2]; /* L40: */ } } } /* Combine all nodes' information, restore disabled, and return */ if (*(unsigned char *)op == '>') { Cdgamx2d(*ictxt,scope," ",*n,c__1,×[1],*n,&c_n1,&c_n1,c_n1,c_n1,c__0); } else if (*(unsigned char *)op == '<') { Cdgamn2d(*ictxt,scope," ",*n,c__1,×[1],*n,&c_n1,&c_n1,c_n1,c_n1,c__0); } else if (*(unsigned char *)op == '+') { Cdgsum2d(*ictxt,scope," ",*n,c__1,×[1],*n,c_n1,c__0); } else { Cdgamx2d(*ictxt,scope," ",*n,c__1,×[1],*n,&c_n1,&c_n1,c_n1,c_n1,c__0); } sltimer00_1.disabled = tmpdis; return 0; } /* slcombine_ */ hpcc-1.4.1/README.html0000644000000000000000000005367711403764057011175 00000000000000 DARPA/DOE HPC Challenge Benchmark version 1.4.1

DARPA/DOE HPC Challenge Benchmark version 1.4.1

Piotr Luszczek*

1  Introduction

This is a suite of benchmarks that measure performance of processor, memory subsytem, and the interconnect. For details refer to the HPC Challenge web site (http://icl.cs.utk.edu/hpcc/.)

In essence, HPC Challenge consists of a number of subbenchmarks each of which tests a different aspect of the system.

If you are familiar with the High Performance Linpack (HPL) benchmark code (see the HPL web site: http://www.netlib.org/benchmark/hpl/) then you can reuse the build script file (input for make(1) command) and the input file that you already have for HPL. The HPC Challenge benchmark includes HPL and uses its build script and input files with only slight modifications. The most important change must be done to the line that sets the TOPdir variable. For HPC Challenge, the variable’s value should always be ../../.. regardless of what it was in the HPL build script file.

2  Compiling

The first step is to create a build script file that reflects characteristics of your machine. This file is reused by all the components of the HPC Challenge suite. The build script file should be created in the hpl directory. This directory contains instructions (the files README and INSTALL) on how to create the build script file for your system. The hpl/setup directory contains many examples of build script files. A recommended approach is to copy one of them to the hpl directory and if it doesn’t work then change it.

The build script file has a name that starts with Make. prefix and usally ends with a suffix that identifies the target system. For example, if the suffix chosen for the system is Unix, the file should be named Make.Unix.

To build the benchmark executable (for the system named Unix) type: make arch=Unix. This command should be run in the top directory (not in the hpl directory). It will look in the hpl directory for the build script file and use it to build the benchmark executable.

The runtime behavior of the HPC Challenge source code may be configured at compiled time by defining a few C preprocessor symbols. They can be defined by adding appropriate options to CCNOOPT and CCFLAGS make variables. The former controls options for source code files that need to be compiled without aggressive optimizations to ensure accurate generation of system-specific parameters. The latter applies to the rest of the files that need good compiler optimization for best performance. To define a symbol S, the majority of compilers requires option -DS to be used. Currently, the following options are available in the HPC Challenge source code:

  • HPCC_FFT_235: if this symbol is defined the FFTE code (an FFT implementation) will use vector sizes and processor counts that are not limited to powers of 2. Instead, the vector sizes and processor counts to be used will be a product of powers of 2, 3, and 5.
  • HPCC_FFTW_ESTIMATE: if this symbol is defined it will affect the way external FFTW library is called (it does not have any effect if the FFTW library is not used). When defined, this symbol will call the FFTW planning routine with FFTW_ESTIMATE flag (instead of FFTW_MEASURE). This might result with worse performance results but shorter execution time of the benchmark. Defining this symbol may also positively affect the memory fragmentation caused by the FFTW’s planning routine.
  • HPCC_MEMALLCTR: if this symbol is defined a custom memory allocator will be used to alleviate effects of memory fragmentation and allow for larger data sets to be used which may result in obtaining better performance.
  • HPL_USE_GETPROCESSTIMES: if this symbol is defined then Windows-specific GetProcessTimes() function will be used to measure the elapsed CPU time.
  • USE_MULTIPLE_RECV: if this symbol is defined then multiple non-blocking receives will be posted simultaneously. By default only one non-blocking receive is posted.
  • RA_SANDIA_NOPT: if this symbol is defined the HPC Challenge standard algorithm for Global RandomAccess will not be used. Instead, an alternative implementation from Sandia National Laboratory will be used. It routes messages in software across virtual hyper-cube topology formed from MPI processes.
  • RA_SANDIA_OPT2: if this symbol is defined the HPC Challenge standard algorithm for Global RandomAccess will not be used. Instead, instead an alternative implementation from Sandia National Laboratory will be used. This implementation is optimized for number of processors being powers of two. The optimizations are sorting of data before sending and unrolling the data update loop. If the number of process is not a power two then the code is the same as the one performed with the RA_SANDIA_NOPT setting.
  • USING_FFTW: if this symbol is defined the standard HPC Challenge FFT implemenation (called FFTE) will not be used. Instead, FFTW library will be called. Defining the USING_FFTW symbol is not sufficient: appropriate flags have to be added in the make script so that FFTW headers files can be found at compile time and the FFTW libraries at link time.

3  Runtime Configuration

The HPC Challenge is driven by a short input file named hpccinf.txt that is almost the same as the input file for HPL (customarily called HPL.dat). Refer to the directory hpl/www/tuning.html for details about the input file for HPL. A sample input file is included with the HPC Challenge distribution.

The differences between HPL’s input file and HPC Challenge’s input file can be summarized as follows:

  • Lines 3 and 4 are ignored. The output is always appended to the file named hpccoutf.txt.
  • There are additional lines (starting with line 33) that may (but do not have to) be used to customize the HPC Challenge benchmark. They are described below.

The additional lines in the HPC Challenge input file (compared to the HPL input file) are:

  • Lines 33 and 34 describe additional matrix sizes to be used for running the PTRANS benchmark (one of the components of the HPC Challenge benchmark).
  • Lines 35 and 36 describe additional blocking factors to be used for running the PTRANS test.

Just for completeness, here is the list of lines of the HPC Challenge’s input file and brief description of their meaning:

  • Line 1: ignored
  • Line 2: ignored
  • Line 3: ignored
  • Line 4: ignored
  • Line 5: number of matrix sizes for HPL (and PTRANS)
  • Line 6: matrix sizes for HPL (and PTRANS)
  • Line 7: number of blocking factors for HPL (and PTRANS)
  • Line 8: blocking factors for HPL (and PTRANS)
  • Line 9: type of process ordering for HPL
  • Line 10: number of process grids for HPL (and PTRANS)
  • Line 11: numbers of process rows of each process grid for HPL (and PTRANS)
  • Line 12: numbers of process columns of each process grid for HPL (and PTRANS)
  • Line 13: threshold value not to be exceeded by scaled residual for HPL (and PTRANS)
  • Line 14: number of panel factorization methods for HPL
  • Line 15: panel factorization methods for HPL
  • Line 16: number of recursive stopping criteria for HPL
  • Line 17: recursive stopping criteria for HPL
  • Line 18: number of recursion panel counts for HPL
  • Line 19: recursion panel counts for HPL
  • Line 20: number of recursive panel factorization methods for HPL
  • Line 21: recursive panel factorization methods for HPL
  • Line 22: number of broadcast methods for HPL
  • Line 23: broadcast methods for HPL
  • Line 24: number of look-ahead depths for HPL
  • Line 25: look-ahead depths for HPL
  • Line 26: swap methods for HPL
  • Line 27: swapping threshold for HPL
  • Line 28: form of L1 for HPL
  • Line 29: form of U for HPL
  • Line 30: value that specifies whether equilibration should be used by HPL
  • Line 31: memory alignment for HPL
  • Line 32: ignored
  • Line 33: number of additional problem sizes for PTRANS
  • Line 34: additional problem sizes for PTRANS
  • Line 35: number of additional blocking factors for PTRANS
  • Line 36: additional blocking factors for PTRANS

4  Running

The exact way to run the HPC Challenge benchmark depends on the MPI implementation and system details. An example command to run the benchmark could like like this: mpirun -np 4 hpcc. The meaning of the command’s components is as follows:

  • mpirun is the command that starts execution of an MPI code. Depending on the system, it might also be aprun, mpiexec, mprun, poe, or something appropriate for your computer.
  • -np 4 is the argument that specifies that 4 MPI processes should be started. The number of MPI processes should be large enough to accomodate all the process grids specified in the hpccinf.txt file.
  • hpcc is the name of the HPC Challenge executable to run.

After the run, a file called hpccoutf.txt is created. It contains results of the benchmark. This file should be uploaded through the web form at the HPC Challenge website.

5  Source Code Changes across Versions (ChangeLog)

5.1  Version 1.4.1 (2010-06-01)

  1. Added optimized variants of RandomAccess that use Linear Congruential Generator for random number generation.
  2. Made corrections to comments that provide definition of the RandomAccess test.
  3. Removed initialization of the main array from the timed section of optimized versions of RandomAccess.
  4. Fixed the length of the vector used to compute error when using MPI implementation from FFTW.
  5. Added global reduction to error calculation to achieve more accurate error estimate.
  6. Updated documentation in README.

5.2  Version 1.4.0 (2010-03-26)

  1. Added new variant of RandomAccess that uses Linear Congruential Generator for random number generation.
  2. Rearranged the order of benchmarks so that HPL component runs last and may be aborted if the performance of other components was not satisfactory. RandomAccess is now first to assist in tuning the code.
  3. Added global initialization and finalization routine that allows to properly initialize and finalize external software and hardware components without changing the rest of the HPCC testing harness.
  4. Lack of hpccinf.txt is no longer reported as error but as a warning.

5.3  Version 1.3.2 (2009-03-24)

  1. Fixed memory leaks in G-RandomAccess driver routine.
  2. Made the check for 32-bit vector sizes in G-FFT optional. MKL allows for 64-bit vector sizes in its FFTW wrapper.
  3. Fixed memory bug in single-process FFT.
  4. Update documentation (README).

5.4  Version 1.3.1 (2008-12-09)

  1. Fixed a dead-lock problem in FFT component due to use of wrong communicator.
  2. Fixed the 32-bit random number generator in PTRANS that was using 64-bit routines from HPL.

5.5  Version 1.3.0 (2008-11-13)

  1. Updated HPL component to use HPL 2.0 source code
    1. Replaced 32-bit Pseudo Random Number Generator (PRNG) with a 64-bit one.
    2. Removed 3 numerical checks of the solution residual with a single one.
    3. Added support for 64-bit systems with large memory sizes (before they would overflow during index calculations 32-bit integers.)
  2. Introduced a limit on FFT vector size so they fit in a 32-bit integer (only applicable when using FFTW version 2.)

5.6  Version 1.2.0 (2007-06-25)

  1. Changes in the FFT component:
    1. Added flexibility in choosing vector sizes and processor counts: now the code can do powers of 2, 3, and 5 both sequentially and in parallel tests.
    2. FFTW can now run with ESTIMATE (not just MEASURE) flag: it might produce worse performance results but often reduces time to run the test and cuases less memory fragmentation.
  2. Changes in the DGEMM component:
    1. Added more comprehensive checking of the numerical properties of the test’s results.
  3. Changes in the RandomAccess component:
    1. Removed time-bound functionality: only runs that perform complete computation are now possible.
    2. Made the timing more accurate: main array initialization is not counted towards performance timing.
    3. Cleaned up the code: some non-portable C language constructs have been removed.
    4. Added new algorithms: new algorithms from Sandia based on hypercube network topology can now be chosen at compile time which results on much better performance results on many types of parallel systems.
    5. Fixed potential resource leaks by adding function calls rquired by the MPI standard.
  4. Changes in the HPL component:
    1. Cleaned up reporting of numerics: more accurate printing of scaled residual formula.
  5. Changes in the PTRANS component:
    1. Added randomization of virtual process grids to measure bandwidth of the network more accurately.
  6. Miscellaneous changes:
    1. Added better support for Windows-based clusters by taking advantage of Win32 API.
    2. Added custom memory allocator to deal with memory fragmentation on some systems.
    3. Added better reporting of configuration options in the output file.

5.7  Version 1.0.0 (2005-06-11)

5.8  Version 0.8beta (2004-10-19)

5.9  Version 0.8alpha (2004-10-15)

5.10  Version 0.6beta (2004-08-21)

5.11  Version 0.6alpha (2004-05-31)

5.12  Version 0.5beta (2003-12-01)

5.13  Version 0.4alpha (2003-11-13)

5.14  Version 0.3alpha (2004-11-05)


*
University of Tennessee Knoxville, Innovative Computing Laboratory

This document was translated from LATEX by HEVEA.
hpcc-1.4.1/README.txt0000644000000000000000000003523011403764124011024 00000000000000 DARPA/DOE HPC Challenge Benchmark version 1.4.1 *********************************************** Piotr Luszczek (1) ================== 1 Introduction *=*=*=*=*=*=*=* This is a suite of benchmarks that measure performance of processor, memory subsytem, and the interconnect. For details refer to the HPC Challenge web site (http://icl.cs.utk.edu/hpcc/.) In essence, HPC Challenge consists of a number of subbenchmarks each of which tests a different aspect of the system. If you are familiar with the High Performance Linpack (HPL) benchmark code (see the HPL web site: http://www.netlib.org/benchmark/hpl/) then you can reuse the build script file (input for make(1) command) and the input file that you already have for HPL. The HPC Challenge benchmark includes HPL and uses its build script and input files with only slight modifications. The most important change must be done to the line that sets the TOPdir variable. For HPC Challenge, the variable's value should always be ../../.. regardless of what it was in the HPL build script file. 2 Compiling *=*=*=*=*=*= The first step is to create a build script file that reflects characteristics of your machine. This file is reused by all the components of the HPC Challenge suite. The build script file should be created in the hpl directory. This directory contains instructions (the files README and INSTALL) on how to create the build script file for your system. The hpl/setup directory contains many examples of build script files. A recommended approach is to copy one of them to the hpl directory and if it doesn't work then change it. The build script file has a name that starts with Make. prefix and usally ends with a suffix that identifies the target system. For example, if the suffix chosen for the system is Unix, the file should be named Make.Unix. To build the benchmark executable (for the system named Unix) type: make arch=Unix. This command should be run in the top directory (not in the hpl directory). It will look in the hpl directory for the build script file and use it to build the benchmark executable. The runtime behavior of the HPC Challenge source code may be configured at compiled time by defining a few C preprocessor symbols. They can be defined by adding appropriate options to CCNOOPT and CCFLAGS make variables. The former controls options for source code files that need to be compiled without aggressive optimizations to ensure accurate generation of system-specific parameters. The latter applies to the rest of the files that need good compiler optimization for best performance. To define a symbol S, the majority of compilers requires option -DS to be used. Currently, the following options are available in the HPC Challenge source code: - HPCC_FFT_235: if this symbol is defined the FFTE code (an FFT implementation) will use vector sizes and processor counts that are not limited to powers of 2. Instead, the vector sizes and processor counts to be used will be a product of powers of 2, 3, and 5. - HPCC_FFTW_ESTIMATE: if this symbol is defined it will affect the way external FFTW library is called (it does not have any effect if the FFTW library is not used). When defined, this symbol will call the FFTW planning routine with FFTW_ESTIMATE flag (instead of FFTW_MEASURE). This might result with worse performance results but shorter execution time of the benchmark. Defining this symbol may also positively affect the memory fragmentation caused by the FFTW's planning routine. - HPCC_MEMALLCTR: if this symbol is defined a custom memory allocator will be used to alleviate effects of memory fragmentation and allow for larger data sets to be used which may result in obtaining better performance. - HPL_USE_GETPROCESSTIMES: if this symbol is defined then Windows-specific GetProcessTimes() function will be used to measure the elapsed CPU time. - USE_MULTIPLE_RECV: if this symbol is defined then multiple non-blocking receives will be posted simultaneously. By default only one non-blocking receive is posted. - RA_SANDIA_NOPT: if this symbol is defined the HPC Challenge standard algorithm for Global RandomAccess will not be used. Instead, an alternative implementation from Sandia National Laboratory will be used. It routes messages in software across virtual hyper-cube topology formed from MPI processes. - RA_SANDIA_OPT2: if this symbol is defined the HPC Challenge standard algorithm for Global RandomAccess will not be used. Instead, instead an alternative implementation from Sandia National Laboratory will be used. This implementation is optimized for number of processors being powers of two. The optimizations are sorting of data before sending and unrolling the data update loop. If the number of process is not a power two then the code is the same as the one performed with the RA_SANDIA_NOPT setting. - USING_FFTW: if this symbol is defined the standard HPC Challenge FFT implemenation (called FFTE) will not be used. Instead, FFTW library will be called. Defining the USING_FFTW symbol is not sufficient: appropriate flags have to be added in the make script so that FFTW headers files can be found at compile time and the FFTW libraries at link time. 3 Runtime Configuration *=*=*=*=*=*=*=*=*=*=*=*= The HPC Challenge is driven by a short input file named hpccinf.txt that is almost the same as the input file for HPL (customarily called HPL.dat). Refer to the directory hpl/www/tuning.html for details about the input file for HPL. A sample input file is included with the HPC Challenge distribution. The differences between HPL's input file and HPC Challenge's input file can be summarized as follows: - Lines 3 and 4 are ignored. The output is always appended to the file named hpccoutf.txt. - There are additional lines (starting with line 33) that may (but do not have to) be used to customize the HPC Challenge benchmark. They are described below. The additional lines in the HPC Challenge input file (compared to the HPL input file) are: - Lines 33 and 34 describe additional matrix sizes to be used for running the PTRANS benchmark (one of the components of the HPC Challenge benchmark). - Lines 35 and 36 describe additional blocking factors to be used for running the PTRANS test. Just for completeness, here is the list of lines of the HPC Challenge's input file and brief description of their meaning: - Line 1: ignored - Line 2: ignored - Line 3: ignored - Line 4: ignored - Line 5: number of matrix sizes for HPL (and PTRANS) - Line 6: matrix sizes for HPL (and PTRANS) - Line 7: number of blocking factors for HPL (and PTRANS) - Line 8: blocking factors for HPL (and PTRANS) - Line 9: type of process ordering for HPL - Line 10: number of process grids for HPL (and PTRANS) - Line 11: numbers of process rows of each process grid for HPL (and PTRANS) - Line 12: numbers of process columns of each process grid for HPL (and PTRANS) - Line 13: threshold value not to be exceeded by scaled residual for HPL (and PTRANS) - Line 14: number of panel factorization methods for HPL - Line 15: panel factorization methods for HPL - Line 16: number of recursive stopping criteria for HPL - Line 17: recursive stopping criteria for HPL - Line 18: number of recursion panel counts for HPL - Line 19: recursion panel counts for HPL - Line 20: number of recursive panel factorization methods for HPL - Line 21: recursive panel factorization methods for HPL - Line 22: number of broadcast methods for HPL - Line 23: broadcast methods for HPL - Line 24: number of look-ahead depths for HPL - Line 25: look-ahead depths for HPL - Line 26: swap methods for HPL - Line 27: swapping threshold for HPL - Line 28: form of L1 for HPL - Line 29: form of U for HPL - Line 30: value that specifies whether equilibration should be used by HPL - Line 31: memory alignment for HPL - Line 32: ignored - Line 33: number of additional problem sizes for PTRANS - Line 34: additional problem sizes for PTRANS - Line 35: number of additional blocking factors for PTRANS - Line 36: additional blocking factors for PTRANS 4 Running *=*=*=*=*= The exact way to run the HPC Challenge benchmark depends on the MPI implementation and system details. An example command to run the benchmark could like like this: mpirun -np 4 hpcc. The meaning of the command's components is as follows: - mpirun is the command that starts execution of an MPI code. Depending on the system, it might also be aprun, mpiexec, mprun, poe, or something appropriate for your computer. - -np 4 is the argument that specifies that 4 MPI processes should be started. The number of MPI processes should be large enough to accomodate all the process grids specified in the hpccinf.txt file. - hpcc is the name of the HPC Challenge executable to run. After the run, a file called hpccoutf.txt is created. It contains results of the benchmark. This file should be uploaded through the web form at the HPC Challenge website. 5 Source Code Changes across Versions (ChangeLog) *=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= 5.1 Version 1.4.1 (2010-06-01) =============================== 1. Added optimized variants of RandomAccess that use Linear Congruential Generator for random number generation. 2. Made corrections to comments that provide definition of the RandomAccess test. 3. Removed initialization of the main array from the timed section of optimized versions of RandomAccess. 4. Fixed the length of the vector used to compute error when using MPI implementation from FFTW. 5. Added global reduction to error calculation to achieve more accurate error estimate. 6. Updated documentation in README. 5.2 Version 1.4.0 (2010-03-26) =============================== 1. Added new variant of RandomAccess that uses Linear Congruential Generator for random number generation. 2. Rearranged the order of benchmarks so that HPL component runs last and may be aborted if the performance of other components was not satisfactory. RandomAccess is now first to assist in tuning the code. 3. Added global initialization and finalization routine that allows to properly initialize and finalize external software and hardware components without changing the rest of the HPCC testing harness. 4. Lack of hpccinf.txt is no longer reported as error but as a warning. 5.3 Version 1.3.2 (2009-03-24) =============================== 1. Fixed memory leaks in G-RandomAccess driver routine. 2. Made the check for 32-bit vector sizes in G-FFT optional. MKL allows for 64-bit vector sizes in its FFTW wrapper. 3. Fixed memory bug in single-process FFT. 4. Update documentation (README). 5.4 Version 1.3.1 (2008-12-09) =============================== 1. Fixed a dead-lock problem in FFT component due to use of wrong communicator. 2. Fixed the 32-bit random number generator in PTRANS that was using 64-bit routines from HPL. 5.5 Version 1.3.0 (2008-11-13) =============================== 1. Updated HPL component to use HPL 2.0 source code 1. Replaced 32-bit Pseudo Random Number Generator (PRNG) with a 64-bit one. 2. Removed 3 numerical checks of the solution residual with a single one. 3. Added support for 64-bit systems with large memory sizes (before they would overflow during index calculations 32-bit integers.) 2. Introduced a limit on FFT vector size so they fit in a 32-bit integer (only applicable when using FFTW version 2.) 5.6 Version 1.2.0 (2007-06-25) =============================== 1. Changes in the FFT component: 1. Added flexibility in choosing vector sizes and processor counts: now the code can do powers of 2, 3, and 5 both sequentially and in parallel tests. 2. FFTW can now run with ESTIMATE (not just MEASURE) flag: it might produce worse performance results but often reduces time to run the test and cuases less memory fragmentation. 2. Changes in the DGEMM component: 1. Added more comprehensive checking of the numerical properties of the test's results. 3. Changes in the RandomAccess component: 1. Removed time-bound functionality: only runs that perform complete computation are now possible. 2. Made the timing more accurate: main array initialization is not counted towards performance timing. 3. Cleaned up the code: some non-portable C language constructs have been removed. 4. Added new algorithms: new algorithms from Sandia based on hypercube network topology can now be chosen at compile time which results on much better performance results on many types of parallel systems. 5. Fixed potential resource leaks by adding function calls rquired by the MPI standard. 4. Changes in the HPL component: 1. Cleaned up reporting of numerics: more accurate printing of scaled residual formula. 5. Changes in the PTRANS component: 1. Added randomization of virtual process grids to measure bandwidth of the network more accurately. 6. Miscellaneous changes: 1. Added better support for Windows-based clusters by taking advantage of Win32 API. 2. Added custom memory allocator to deal with memory fragmentation on some systems. 3. Added better reporting of configuration options in the output file. 5.7 Version 1.0.0 (2005-06-11) =============================== 5.8 Version 0.8beta (2004-10-19) ================================= 5.9 Version 0.8alpha (2004-10-15) ================================== 5.10 Version 0.6beta (2004-08-21) ================================== 5.11 Version 0.6alpha (2004-05-31) =================================== 5.12 Version 0.5beta (2003-12-01) ================================== 5.13 Version 0.4alpha (2003-11-13) =================================== 5.14 Version 0.3alpha (2004-11-05) =================================== ----------------------------------------------------------------------- This document was translated from LaTeX by HeVeA (2). ----------------------------------- (1) University of Tennessee Knoxville, Innovative Computing Laboratory (2) http://hevea.inria.fr/index.html hpcc-1.4.1/RandomAccess/MPIRandomAccess.c0000644000000000000000000010267311353467335014762 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" /* Allocate main table (in global memory) */ u64Int *HPCC_Table; u64Int LocalSendBuffer[LOCAL_BUFFER_SIZE]; u64Int LocalRecvBuffer[MAX_RECV*LOCAL_BUFFER_SIZE]; #ifndef LONG_IS_64BITS static void Sum64(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) { int i, n = *len; s64Int *invec64 = (s64Int *)invec, *inoutvec64 = (s64Int *)inoutvec; for (i = n; i; i--, invec64++, inoutvec64++) *inoutvec64 += *invec64; } #endif #ifdef HPCC_RA_STDALG void AnyNodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { s64Int i, j; int proc_count; s64Int SendCnt; u64Int Ran; s64Int WhichPe; u64Int GlobalOffset, LocalOffset; int NumberReceiving = NumProcs - 1; #ifdef USE_MULTIPLE_RECV int index, NumRecvs; MPI_Request inreq[MAX_RECV] = { MPI_REQUEST_NULL }; MPI_Request outreq = MPI_REQUEST_NULL; #else MPI_Request inreq, outreq = MPI_REQUEST_NULL; #endif u64Int inmsg; int bufferBase; MPI_Status status; int have_done; int pe; int pendingUpdates; int maxPendingUpdates; int localBufferSize; int peUpdates; int recvUpdates; Bucket_Ptr Buckets; pendingUpdates = 0; maxPendingUpdates = MAX_TOTAL_PENDING_UPDATES; localBufferSize = LOCAL_BUFFER_SIZE; Buckets = HPCC_InitBuckets(NumProcs, maxPendingUpdates); /* Perform updates to main table. The scalar equivalent is: * * u64Int Ran; * Ran = 1; * for (i=0; i 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); GlobalOffset = Ran & (TableSize-1); if ( GlobalOffset < Top) WhichPe = ( GlobalOffset / (MinLocalTableSize + 1) ); else WhichPe = ( (GlobalOffset - Remainder) / MinLocalTableSize ); if (WhichPe == MyProc) { LocalOffset = (Ran & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send remaining updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } MPI_Waitall( NumProcs, finish_req, finish_statuses); /* Be nice and clean up after ourselves */ HPCC_FreeBuckets(Buckets, NumProcs); #ifdef USE_MULTIPLE_RECV for (j = 0; j < NumRecvs; j++) { MPI_Cancel(&inreq[j]); MPI_Wait(&inreq[j], MPI_STATUS_IGNORE); } #else MPI_Cancel(&inreq); MPI_Wait(&inreq, MPI_STATUS_IGNORE); #endif MPI_Wait(&outreq, MPI_STATUS_IGNORE); /* end multiprocessor code */ } void Power2NodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { s64Int i, j; int proc_count; s64Int SendCnt; u64Int Ran; s64Int WhichPe; u64Int LocalOffset; int logLocalTableSize = logTableSize - logNumProcs; int NumberReceiving = NumProcs - 1; #ifdef USE_MULTIPLE_RECV int index, NumRecvs; MPI_Request inreq[MAX_RECV] = { MPI_REQUEST_NULL }; MPI_Request outreq = MPI_REQUEST_NULL; #else MPI_Request inreq, outreq = MPI_REQUEST_NULL; #endif u64Int inmsg; int bufferBase; MPI_Status status; int have_done; int pe; int pendingUpdates; int maxPendingUpdates; int localBufferSize; int peUpdates; int recvUpdates; Bucket_Ptr Buckets; pendingUpdates = 0; maxPendingUpdates = MAX_TOTAL_PENDING_UPDATES; localBufferSize = LOCAL_BUFFER_SIZE; Buckets = HPCC_InitBuckets(NumProcs, maxPendingUpdates); /* Perform updates to main table. The scalar equivalent is: * * u64Int Ran; * Ran = 1; * for (i=0; i 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[inmsg & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); WhichPe = (Ran >> logLocalTableSize) & (NumProcs - 1); if (WhichPe == MyProc) { LocalOffset = (Ran & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send remaining updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[inmsg & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[inmsg & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } MPI_Waitall( NumProcs, finish_req, finish_statuses); /* Be nice and clean up after ourselves */ HPCC_FreeBuckets(Buckets, NumProcs); #ifdef USE_MULTIPLE_RECV for (j = 0; j < NumRecvs; j++) { MPI_Cancel(&inreq[j]); MPI_Wait(&inreq[j], MPI_STATUS_IGNORE); } #else MPI_Cancel(&inreq); MPI_Wait(&inreq, MPI_STATUS_IGNORE); #endif MPI_Wait(&outreq, MPI_STATUS_IGNORE); /* end multiprocessor code */ } #endif int HPCC_MPIRandomAccess(HPCC_Params *params) { s64Int i; s64Int NumErrors, GlbNumErrors; int NumProcs, logNumProcs, MyProc; u64Int GlobalStartMyProc; int Remainder; /* Number of processors with (LocalTableSize + 1) entries */ u64Int Top; /* Number of table entries in top of Table */ s64Int LocalTableSize; /* Local table width */ u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */ u64Int logTableSize, TableSize; double CPUTime; /* CPU time to update table */ double RealTime; /* Real time to update table */ double TotalMem; int sAbort, rAbort; int PowerofTwo; double timeBound = -1; /* OPTIONAL time bound for execution time */ u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */ u64Int NumUpdates; /* actual number of updates to table - may be smaller than * NumUpdates_Default due to execution time bounds */ s64Int ProcNumUpdates; /* number of updates per processor */ #ifdef RA_TIME_BOUND s64Int GlbNumUpdates; /* for reduction */ #endif FILE *outFile = NULL; MPI_Op sum64; double *GUPs; MPI_Datatype INT64_DT; MPI_Status *finish_statuses; MPI_Request *finish_req; #ifdef LONG_IS_64BITS INT64_DT = MPI_LONG; #else INT64_DT = MPI_LONG_LONG_INT; #endif GUPs = ¶ms->MPIRandomAccess_GUPs; MPI_Comm_size( MPI_COMM_WORLD, &NumProcs ); MPI_Comm_rank( MPI_COMM_WORLD, &MyProc ); if (0 == MyProc) { outFile = fopen( params->outFname, "a" ); if (! outFile) outFile = stderr; } TotalMem = params->HPLMaxProcMem; /* max single node memory */ TotalMem *= NumProcs; /* max memory in NumProcs nodes */ TotalMem /= sizeof(u64Int); /* calculate TableSize --- the size of update array (must be a power of 2) */ for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1; TotalMem >= 1.0; TotalMem *= 0.5, logTableSize++, TableSize <<= 1) ; /* EMPTY */ /* determine whether the number of processors is a power of 2 */ for (i = 1, logNumProcs = 0; ; logNumProcs++, i <<= 1) { if (i == NumProcs) { PowerofTwo = HPCC_TRUE; Remainder = 0; Top = 0; MinLocalTableSize = (TableSize / NumProcs); LocalTableSize = MinLocalTableSize; GlobalStartMyProc = (MinLocalTableSize * MyProc); break; /* number of processes is not a power 2 (too many shifts may introduce negative values or 0) */ } else if (i > NumProcs || i <= 0) { PowerofTwo = HPCC_FALSE; /* Minimum local table size --- some processors have an additional entry */ MinLocalTableSize = (TableSize / NumProcs); /* Number of processors with (LocalTableSize + 1) entries */ Remainder = TableSize - (MinLocalTableSize * NumProcs); /* Number of table entries in top of Table */ Top = (MinLocalTableSize + 1) * Remainder; /* Local table size */ if (MyProc < Remainder) { LocalTableSize = (MinLocalTableSize + 1); GlobalStartMyProc = ( (MinLocalTableSize + 1) * MyProc); } else { LocalTableSize = MinLocalTableSize; GlobalStartMyProc = ( (MinLocalTableSize * MyProc) + Remainder ); } break; } /* end else if */ } /* end for i */ sAbort = 0; finish_statuses = XMALLOC( MPI_Status, NumProcs ); finish_req = XMALLOC( MPI_Request, NumProcs ); HPCC_Table = HPCC_XMALLOC( u64Int, LocalTableSize ); if (! finish_statuses || ! finish_req || ! HPCC_Table) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(outFile, "Failed to allocate memory for the main table.\n"); /* check all allocations in case there are new added and their order changes */ if (finish_statuses) free( finish_statuses ); if (finish_req) free( finish_req ); if (HPCC_Table) HPCC_free( HPCC_Table ); goto failed_table; } params->MPIRandomAccess_N = (s64Int)TableSize; /* Default number of global updates to table: 4x number of table entries */ NumUpdates_Default = 4 * TableSize; ProcNumUpdates = 4*LocalTableSize; NumUpdates = NumUpdates_Default; /* The time bound is only accurate for standard RandomAccess algorithm. */ #ifdef HPCC_RA_STDALG #ifdef RA_TIME_BOUND /* estimate number of updates such that execution time does not exceed time bound */ /* time_bound should be a parameter */ /* max run time in seconds */ MPI_Allreduce( ¶ms->HPLrdata.time, &timeBound, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); timeBound = Mmax( 0.25 * timeBound, (double)TIME_BOUND ); if (PowerofTwo) { HPCC_Power2NodesTime(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, INT64_DT, timeBound, (u64Int *)&ProcNumUpdates, finish_statuses, finish_req); } else { HPCC_AnyNodesTime(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, INT64_DT, timeBound, (u64Int *)&ProcNumUpdates, finish_statuses, finish_req); } /* be conservative: get the smallest number of updates among all procs */ MPI_Reduce( &ProcNumUpdates, &GlbNumUpdates, 1, INT64_DT, MPI_MIN, 0, MPI_COMM_WORLD ); /* distribute number of updates per proc to all procs */ MPI_Bcast( &GlbNumUpdates, 1, INT64_DT, 0, MPI_COMM_WORLD ); ProcNumUpdates = Mmin(GlbNumUpdates, (4*LocalTableSize)); /* works for both PowerofTwo and AnyNodes */ NumUpdates = Mmin((ProcNumUpdates*NumProcs), (s64Int)NumUpdates_Default); #endif #endif if (MyProc == 0) { fprintf( outFile, "Running on %d processors%s\n", NumProcs, PowerofTwo ? " (PowerofTwo)" : ""); fprintf( outFile, "Total Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize, TableSize ); if (PowerofTwo) fprintf( outFile, "PE Main table size = 2^" FSTR64 " = " FSTR64 " words/PE\n", (logTableSize - logNumProcs), TableSize/NumProcs ); else fprintf( outFile, "PE Main table size = (2^" FSTR64 ")/%d = " FSTR64 " words/PE MAX\n", logTableSize, NumProcs, LocalTableSize); fprintf( outFile, "Default number of updates (RECOMMENDED) = " FSTR64 "\n", NumUpdates_Default); #ifdef RA_TIME_BOUND fprintf( outFile, "Number of updates EXECUTED = " FSTR64 " (for a TIME BOUND of %.2f secs)\n", NumUpdates, timeBound); #endif params->MPIRandomAccess_ExeUpdates = NumUpdates; params->MPIRandomAccess_TimeBound = timeBound; } /* Initialize main table */ for (i=0; iMPIRandomAccess_time = RealTime; *GUPs = 1e-9*NumUpdates / RealTime; fprintf( outFile, "CPU time used = %.6f seconds\n", CPUTime ); fprintf( outFile, "Real time used = %.6f seconds\n", RealTime ); fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs ); fprintf( outFile, "%.9f Billion(10^9) Updates/PE per second [GUP/s]\n", *GUPs / NumProcs ); /* No longer reporting per CPU number */ /* *GUPs /= NumProcs; */ } /* distribute result to all nodes */ MPI_Bcast( GUPs, 1, MPI_INT, 0, MPI_COMM_WORLD ); /* Verification phase */ /* Begin timing here */ CPUTime = -CPUSEC(); RealTime = -RTSEC(); if (PowerofTwo) { HPCC_Power2NodesMPIRandomAccessCheck(logTableSize, TableSize, LocalTableSize, GlobalStartMyProc, logNumProcs, NumProcs, MyProc, ProcNumUpdates, INT64_DT, &NumErrors); } else { HPCC_AnyNodesMPIRandomAccessCheck(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, ProcNumUpdates, INT64_DT, &NumErrors); } #ifdef LONG_IS_64BITS MPI_Reduce( &NumErrors, &GlbNumErrors, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); #else /* MPI 1.1 standard (obsolete at this point) doesn't define MPI_SUM to work on `long long': http://www.mpi-forum.org/docs/mpi-11-html/node78.html and therefore LAM 6.5.6 chooses not to implement it (even though there is code for it in LAM and for other reductions work OK, e.g. MPI_MAX). MPICH 1.2.5 doesn't complain about MPI_SUM but it doesn't have MPI_UNSIGNED_LONG_LONG (but has MPI_LONG_LONG_INT): http://www.mpi-forum.org/docs/mpi-20-html/node84.htm So I need to create a trivial summation operation. */ MPI_Op_create( Sum64, 1, &sum64 ); MPI_Reduce( &NumErrors, &GlbNumErrors, 1, INT64_DT, sum64, 0, MPI_COMM_WORLD ); MPI_Op_free( &sum64 ); #endif /* End timed section */ CPUTime += CPUSEC(); RealTime += RTSEC(); if(MyProc == 0){ params->MPIRandomAccess_CheckTime = RealTime; fprintf( outFile, "Verification: CPU time used = %.6f seconds\n", CPUTime); fprintf( outFile, "Verification: Real time used = %.6f seconds\n", RealTime); fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n", GlbNumErrors, TableSize, (GlbNumErrors <= 0.01*TableSize) ? "passed" : "failed"); if (GlbNumErrors > 0.01*TableSize) params->Failure = 1; params->MPIRandomAccess_Errors = (s64Int)GlbNumErrors; params->MPIRandomAccess_ErrorsFraction = (double)GlbNumErrors / (double)TableSize; params->MPIRandomAccess_Algorithm = HPCC_RA_ALGORITHM; } /* End verification phase */ /* Deallocate memory (in reverse order of allocation which should help fragmentation) */ HPCC_free( HPCC_Table ); free( finish_req ); free( finish_statuses ); failed_table: if (0 == MyProc) if (outFile != stderr) fclose( outFile ); MPI_Barrier( MPI_COMM_WORLD ); return 0; } hpcc-1.4.1/RandomAccess/MPIRandomAccessLCG.c0000644000000000000000000010265611403763471015305 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" #ifndef LONG_IS_64BITS static void Sum64(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) { int i, n = *len; s64Int *invec64 = (s64Int *)invec, *inoutvec64 = (s64Int *)inoutvec; for (i = n; i; i--, invec64++, inoutvec64++) *inoutvec64 += *invec64; } #endif #ifdef HPCC_RA_STDALG void HPCC_AnyNodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { s64Int i, j; int proc_count; s64Int SendCnt; u64Int Ran; s64Int WhichPe; u64Int GlobalOffset, LocalOffset; int NumberReceiving = NumProcs - 1; #ifdef USE_MULTIPLE_RECV int index, NumRecvs; MPI_Request inreq[MAX_RECV] = { MPI_REQUEST_NULL }; MPI_Request outreq = MPI_REQUEST_NULL; #else MPI_Request inreq, outreq = MPI_REQUEST_NULL; #endif u64Int inmsg; int bufferBase; MPI_Status status; int have_done; int pe; int pendingUpdates; int maxPendingUpdates; int localBufferSize; int peUpdates; int recvUpdates; Bucket_Ptr Buckets; pendingUpdates = 0; maxPendingUpdates = MAX_TOTAL_PENDING_UPDATES; localBufferSize = LOCAL_BUFFER_SIZE; Buckets = HPCC_InitBuckets(NumProcs, maxPendingUpdates); /* Perform updates to main table. The scalar equivalent is: * * u64Int Ran; * Ran = 1; * for (i=0; i> (64 - LOG2_TABSIZE)] ^= Ran; * } */ SendCnt = ProcNumUpdates; /* SendCnt = (4 * LocalTableSize); */ Ran = HPCC_starts_LCG(4 * GlobalStartMyProc); i = 0; #ifdef USE_MULTIPLE_RECV NumRecvs = (NumProcs > 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg >> (64 - logTableSize)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = LCG_MUL64 * Ran + LCG_ADD64; GlobalOffset = Ran >> (64 - logTableSize); if ( GlobalOffset < Top) WhichPe = ( GlobalOffset / (MinLocalTableSize + 1) ); else WhichPe = ( (GlobalOffset - Remainder) / MinLocalTableSize ); if (WhichPe == MyProc) { LocalOffset = (Ran >> (64 - logTableSize)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send remaining updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg >> (64 - logTableSize)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg >> (64 - logTableSize)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } MPI_Waitall( NumProcs, finish_req, finish_statuses); /* Be nice and clean up after ourselves */ HPCC_FreeBuckets(Buckets, NumProcs); #ifdef USE_MULTIPLE_RECV for (j = 0; j < NumRecvs; j++) { MPI_Cancel(&inreq[j]); MPI_Wait(&inreq[j], MPI_STATUS_IGNORE); } #else MPI_Cancel(&inreq); MPI_Wait(&inreq, MPI_STATUS_IGNORE); #endif MPI_Wait(&outreq, MPI_STATUS_IGNORE); /* end multiprocessor code */ } void HPCC_Power2NodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { s64Int i, j; int proc_count; s64Int SendCnt; u64Int Ran; s64Int WhichPe; u64Int LocalOffset; int logLocalTableSize = logTableSize - logNumProcs; int NumberReceiving = NumProcs - 1; #ifdef USE_MULTIPLE_RECV int index, NumRecvs; MPI_Request inreq[MAX_RECV] = { MPI_REQUEST_NULL }; MPI_Request outreq = MPI_REQUEST_NULL; #else MPI_Request inreq, outreq = MPI_REQUEST_NULL; #endif u64Int inmsg; int bufferBase; MPI_Status status; int have_done; int pe; int pendingUpdates; int maxPendingUpdates; int localBufferSize; int peUpdates; int recvUpdates; Bucket_Ptr Buckets; pendingUpdates = 0; maxPendingUpdates = MAX_TOTAL_PENDING_UPDATES; localBufferSize = LOCAL_BUFFER_SIZE; Buckets = HPCC_InitBuckets(NumProcs, maxPendingUpdates); /* Perform updates to main table. The scalar equivalent is: * * u64Int Ran; * Ran = 1; * for (i=0; i> (64 - LOG2_TABSIZE)] ^= Ran; * } */ SendCnt = ProcNumUpdates; /* SendCnt = (4 * LocalTableSize); */ Ran = HPCC_starts_LCG(4 * GlobalStartMyProc); i = 0; #ifdef USE_MULTIPLE_RECV NumRecvs = (NumProcs > 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[(inmsg >> (64 - logTableSize)) & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = LCG_MUL64 * Ran + LCG_ADD64; WhichPe = (Ran >> (64 - logTableSize + logLocalTableSize)) & (NumProcs - 1); if (WhichPe == MyProc) { LocalOffset = (Ran >> (64 - logTableSize)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send remaining updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[(inmsg >> (64 - logTableSize)) & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[(inmsg >> (64 - logTableSize)) & (LocalTableSize-1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } MPI_Waitall( NumProcs, finish_req, finish_statuses); /* Be nice and clean up after ourselves */ HPCC_FreeBuckets(Buckets, NumProcs); #ifdef USE_MULTIPLE_RECV for (j = 0; j < NumRecvs; j++) { MPI_Cancel(&inreq[j]); MPI_Wait(&inreq[j], MPI_STATUS_IGNORE); } #else MPI_Cancel(&inreq); MPI_Wait(&inreq, MPI_STATUS_IGNORE); #endif MPI_Wait(&outreq, MPI_STATUS_IGNORE); /* end multiprocessor code */ } #endif int HPCC_MPIRandomAccess_LCG(HPCC_Params *params) { s64Int i; s64Int NumErrors, GlbNumErrors; int NumProcs, logNumProcs, MyProc; u64Int GlobalStartMyProc; int Remainder; /* Number of processors with (LocalTableSize + 1) entries */ u64Int Top; /* Number of table entries in top of Table */ s64Int LocalTableSize; /* Local table width */ u64Int MinLocalTableSize; /* Integer ratio TableSize/NumProcs */ u64Int logTableSize, TableSize; double CPUTime; /* CPU time to update table */ double RealTime; /* Real time to update table */ double TotalMem; int sAbort, rAbort; int PowerofTwo; double timeBound = -1; /* OPTIONAL time bound for execution time */ u64Int NumUpdates_Default; /* Number of updates to table (suggested: 4x number of table entries) */ u64Int NumUpdates; /* actual number of updates to table - may be smaller than * NumUpdates_Default due to execution time bounds */ s64Int ProcNumUpdates; /* number of updates per processor */ #ifdef RA_TIME_BOUND s64Int GlbNumUpdates; /* for reduction */ #endif FILE *outFile = NULL; MPI_Op sum64; double *GUPs; MPI_Datatype INT64_DT; MPI_Status *finish_statuses; MPI_Request *finish_req; #ifdef LONG_IS_64BITS INT64_DT = MPI_LONG; #else INT64_DT = MPI_LONG_LONG_INT; #endif GUPs = ¶ms->MPIRandomAccess_LCG_GUPs; MPI_Comm_size( MPI_COMM_WORLD, &NumProcs ); MPI_Comm_rank( MPI_COMM_WORLD, &MyProc ); if (0 == MyProc) { outFile = fopen( params->outFname, "a" ); if (! outFile) outFile = stderr; } TotalMem = params->HPLMaxProcMem; /* max single node memory */ TotalMem *= NumProcs; /* max memory in NumProcs nodes */ TotalMem /= sizeof(u64Int); /* calculate TableSize --- the size of update array (must be a power of 2) */ for (TotalMem *= 0.5, logTableSize = 0, TableSize = 1; TotalMem >= 1.0; TotalMem *= 0.5, logTableSize++, TableSize <<= 1) ; /* EMPTY */ /* determine whether the number of processors is a power of 2 */ for (i = 1, logNumProcs = 0; ; logNumProcs++, i <<= 1) { if (i == NumProcs) { PowerofTwo = HPCC_TRUE; Remainder = 0; Top = 0; MinLocalTableSize = (TableSize / NumProcs); LocalTableSize = MinLocalTableSize; GlobalStartMyProc = (MinLocalTableSize * MyProc); break; /* number of processes is not a power 2 (too many shifts may introduce negative values or 0) */ } else if (i > NumProcs || i <= 0) { PowerofTwo = HPCC_FALSE; /* Minimum local table size --- some processors have an additional entry */ MinLocalTableSize = (TableSize / NumProcs); /* Number of processors with (LocalTableSize + 1) entries */ Remainder = TableSize - (MinLocalTableSize * NumProcs); /* Number of table entries in top of Table */ Top = (MinLocalTableSize + 1) * Remainder; /* Local table size */ if (MyProc < Remainder) { LocalTableSize = (MinLocalTableSize + 1); GlobalStartMyProc = ( (MinLocalTableSize + 1) * MyProc); } else { LocalTableSize = MinLocalTableSize; GlobalStartMyProc = ( (MinLocalTableSize * MyProc) + Remainder ); } break; } /* end else if */ } /* end for i */ sAbort = 0; finish_statuses = XMALLOC( MPI_Status, NumProcs ); finish_req = XMALLOC( MPI_Request, NumProcs ); HPCC_Table = HPCC_XMALLOC( u64Int, LocalTableSize ); if (! finish_statuses || ! finish_req || ! HPCC_Table) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(outFile, "Failed to allocate memory for the main table.\n"); /* check all allocations in case there are new added and their order changes */ if (finish_statuses) free( finish_statuses ); if (finish_req) free( finish_req ); if (HPCC_Table) HPCC_free( HPCC_Table ); goto failed_table; } params->MPIRandomAccess_LCG_N = (s64Int)TableSize; /* Default number of global updates to table: 4x number of table entries */ NumUpdates_Default = 4 * TableSize; ProcNumUpdates = 4*LocalTableSize; NumUpdates = NumUpdates_Default; /* The time bound is only accurate for standard RandomAccess algorithm. */ #ifdef HPCC_RA_STDALG #ifdef RA_TIME_BOUND /* estimate number of updates such that execution time does not exceed time bound */ /* time_bound should be a parameter */ /* max run time in seconds */ MPI_Allreduce( ¶ms->HPLrdata.time, &timeBound, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD ); timeBound = Mmax( 0.25 * timeBound, (double)TIME_BOUND ); if (PowerofTwo) { HPCC_Power2NodesTime(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, INT64_DT, timeBound, (u64Int *)&ProcNumUpdates, finish_statuses, finish_req); } else { HPCC_AnyNodesTime(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, INT64_DT, timeBound, (u64Int *)&ProcNumUpdates, finish_statuses, finish_req); } /* be conservative: get the smallest number of updates among all procs */ MPI_Reduce( &ProcNumUpdates, &GlbNumUpdates, 1, INT64_DT, MPI_MIN, 0, MPI_COMM_WORLD ); /* distribute number of updates per proc to all procs */ MPI_Bcast( &GlbNumUpdates, 1, INT64_DT, 0, MPI_COMM_WORLD ); ProcNumUpdates = Mmin(GlbNumUpdates, (4*LocalTableSize)); /* works for both PowerofTwo and AnyNodes */ NumUpdates = Mmin((ProcNumUpdates*NumProcs), (s64Int)NumUpdates_Default); #endif #endif if (MyProc == 0) { fprintf( outFile, "Running on %d processors%s\n", NumProcs, PowerofTwo ? " (PowerofTwo)" : ""); fprintf( outFile, "Total Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize, TableSize ); if (PowerofTwo) fprintf( outFile, "PE Main table size = 2^" FSTR64 " = " FSTR64 " words/PE\n", (logTableSize - logNumProcs), TableSize/NumProcs ); else fprintf( outFile, "PE Main table size = (2^" FSTR64 ")/%d = " FSTR64 " words/PE MAX\n", logTableSize, NumProcs, LocalTableSize); fprintf( outFile, "Default number of updates (RECOMMENDED) = " FSTR64 "\n", NumUpdates_Default); #ifdef RA_TIME_BOUND fprintf( outFile, "Number of updates EXECUTED = " FSTR64 " (for a TIME BOUND of %.2f secs)\n", NumUpdates, timeBound); #endif params->MPIRandomAccess_LCG_ExeUpdates = NumUpdates; params->MPIRandomAccess_LCG_TimeBound = timeBound; } /* Initialize main table */ for (i=0; iMPIRandomAccess_LCG_time = RealTime; *GUPs = 1e-9*NumUpdates / RealTime; fprintf( outFile, "CPU time used = %.6f seconds\n", CPUTime ); fprintf( outFile, "Real time used = %.6f seconds\n", RealTime ); fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs ); fprintf( outFile, "%.9f Billion(10^9) Updates/PE per second [GUP/s]\n", *GUPs / NumProcs ); /* No longer reporting per CPU number */ /* *GUPs /= NumProcs; */ } /* distribute result to all nodes */ MPI_Bcast( GUPs, 1, MPI_INT, 0, MPI_COMM_WORLD ); /* Verification phase */ /* Begin timing here */ CPUTime = -CPUSEC(); RealTime = -RTSEC(); if (PowerofTwo) { HPCC_Power2NodesMPIRandomAccessCheck_LCG(logTableSize, TableSize, LocalTableSize, GlobalStartMyProc, logNumProcs, NumProcs, MyProc, ProcNumUpdates, INT64_DT, &NumErrors); } else { HPCC_AnyNodesMPIRandomAccessCheck_LCG(logTableSize, TableSize, LocalTableSize, MinLocalTableSize, GlobalStartMyProc, Top, logNumProcs, NumProcs, Remainder, MyProc, ProcNumUpdates, INT64_DT, &NumErrors); } #ifdef LONG_IS_64BITS MPI_Reduce( &NumErrors, &GlbNumErrors, 1, MPI_LONG, MPI_SUM, 0, MPI_COMM_WORLD ); #else /* MPI 1.1 standard (obsolete at this point) doesn't define MPI_SUM to work on `long long': http://www.mpi-forum.org/docs/mpi-11-html/node78.html and therefore LAM 6.5.6 chooses not to implement it (even though there is code for it in LAM and for other reductions work OK, e.g. MPI_MAX). MPICH 1.2.5 doesn't complain about MPI_SUM but it doesn't have MPI_UNSIGNED_LONG_LONG (but has MPI_LONG_LONG_INT): http://www.mpi-forum.org/docs/mpi-20-html/node84.htm So I need to create a trivial summation operation. */ MPI_Op_create( Sum64, 1, &sum64 ); MPI_Reduce( &NumErrors, &GlbNumErrors, 1, INT64_DT, sum64, 0, MPI_COMM_WORLD ); MPI_Op_free( &sum64 ); #endif /* End timed section */ CPUTime += CPUSEC(); RealTime += RTSEC(); if(MyProc == 0){ params->MPIRandomAccess_LCG_CheckTime = RealTime; fprintf( outFile, "Verification: CPU time used = %.6f seconds\n", CPUTime); fprintf( outFile, "Verification: Real time used = %.6f seconds\n", RealTime); fprintf( outFile, "Found " FSTR64 " errors in " FSTR64 " locations (%s).\n", GlbNumErrors, TableSize, (GlbNumErrors <= 0.01*TableSize) ? "passed" : "failed"); if (GlbNumErrors > 0.01*TableSize) params->Failure = 1; params->MPIRandomAccess_LCG_Errors = (s64Int)GlbNumErrors; params->MPIRandomAccess_LCG_ErrorsFraction = (double)GlbNumErrors / (double)TableSize; params->MPIRandomAccess_LCG_Algorithm = HPCC_RA_ALGORITHM; } /* End verification phase */ /* Deallocate memory (in reverse order of allocation which should help fragmentation) */ HPCC_free( HPCC_Table ); free( finish_req ); free( finish_statuses ); failed_table: if (0 == MyProc) if (outFile != stderr) fclose( outFile ); MPI_Barrier( MPI_COMM_WORLD ); return 0; } hpcc-1.4.1/RandomAccess/MPIRandomAccessLCG_opt.c0000644000000000000000000004475411403763471016173 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" #define CHUNK MAX_TOTAL_PENDING_UPDATES #define CHUNKBIG (32*CHUNK) #define RCHUNK (16384) #define PITER 8 #define MAXLOGPROCS 20 #ifdef RA_SANDIA_OPT2 void HPCC_AnyNodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int ipartner,iterate,niterate,npartition,proclo,nlower,nupper,procmid; int ndata,nkeep,nsend,nrecv,index, nfrac; u64Int ran,datum,nglobalm1,indexmid; u64Int *data,*send, *offsets; MPI_Status status; /* setup: should not really be part of this timed routine NOTE: niterate must be computed from global TableSize * 4 not from ProcNumUpdates since that can be different on each proc round niterate up by 1 to do slightly more than required updates */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts_LCG(4*GlobalStartMyProc); offsets = (u64Int *) malloc((NumProcs+1)*sizeof(u64Int)); MPI_Allgather(&GlobalStartMyProc,1,INT64_DT,offsets,1,INT64_DT, MPI_COMM_WORLD); offsets[NumProcs] = TableSize; niterate = 4 * TableSize / NumProcs / CHUNK + 1; nglobalm1 = 64 - logTableSize; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = LCG_MUL64 * ran + LCG_ADD64; data[i] = ran; } ndata = CHUNK; npartition = NumProcs; proclo = 0; while (npartition > 1) { nlower = npartition/2; nupper = npartition - nlower; procmid = proclo + nlower; indexmid = offsets[procmid]; nkeep = nsend = 0; if (MyProc < procmid) { for (i = 0; i < ndata; i++) { if ((data[i] >> nglobalm1) >= indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if ((data[i] >> nglobalm1) < indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } if (nlower == nupper) { if (MyProc < procmid) ipartner = MyProc + nlower; else ipartner = MyProc - nlower; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc < procmid) { nfrac = (nlower - (MyProc-proclo)) * nsend / nupper; ipartner = MyProc + nlower; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner+1,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner+1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else if (MyProc > procmid && MyProc < procmid+nlower) { nfrac = (MyProc - procmid) * nsend / nlower; ipartner = MyProc - nlower; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner-1,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner-1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc == procmid) ipartner = MyProc - nlower; else ipartner = MyProc - nupper; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } } if (MyProc < procmid) npartition = nlower; else { proclo = procmid; npartition = nupper; } } for (i = 0; i < ndata; i++) { datum = data[i]; index = (datum >> nglobalm1) - GlobalStartMyProc; HPCC_Table[index] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); free(offsets); } /* This sort is manually unrolled to make sure the compiler can see * the parallelism -KDU */ static void sort_data(u64Int *source, u64Int *nomatch, u64Int *match, int number, int *nnomatch, int *nmatch, int mask_shift) { int i,dindex,myselect[8],counts[2]; int div_num = number / 8; int loop_total = div_num * 8; u64Int procmask = ((u64Int) 1) << mask_shift; u64Int *buffers[2]; buffers[0] = nomatch; counts[0] = *nnomatch; buffers[1] = match; counts[1] = *nmatch; for (i = 0; i < div_num; i++) { dindex = i*8; myselect[0] = (source[dindex] & procmask) >> mask_shift; myselect[1] = (source[dindex+1] & procmask) >> mask_shift; myselect[2] = (source[dindex+2] & procmask) >> mask_shift; myselect[3] = (source[dindex+3] & procmask) >> mask_shift; myselect[4] = (source[dindex+4] & procmask) >> mask_shift; myselect[5] = (source[dindex+5] & procmask) >> mask_shift; myselect[6] = (source[dindex+6] & procmask) >> mask_shift; myselect[7] = (source[dindex+7] & procmask) >> mask_shift; buffers[myselect[0]][counts[myselect[0]]++] = source[dindex]; buffers[myselect[1]][counts[myselect[1]]++] = source[dindex+1]; buffers[myselect[2]][counts[myselect[2]]++] = source[dindex+2]; buffers[myselect[3]][counts[myselect[3]]++] = source[dindex+3]; buffers[myselect[4]][counts[myselect[4]]++] = source[dindex+4]; buffers[myselect[5]][counts[myselect[5]]++] = source[dindex+5]; buffers[myselect[6]][counts[myselect[6]]++] = source[dindex+6]; buffers[myselect[7]][counts[myselect[7]]++] = source[dindex+7]; } for (i = loop_total; i < number; i++) { u64Int mydata = source[i]; if (mydata & procmask) buffers[1][counts[1]++] = mydata; else buffers[0][counts[0]++] = mydata; } *nnomatch = counts[0]; *nmatch = counts[1]; } /* Manual unrolling is a significant win if -Msafeptr is used -KDU */ static void update_table(u64Int *data, u64Int *table, int number, int nglobalm1, int nlocalm1) { int i,dindex,index; int div_num = number / 8; int loop_total = div_num * 8; u64Int index0,index1,index2,index3,index4,index5,index6,index7; u64Int ltable0,ltable1,ltable2,ltable3,ltable4,ltable5,ltable6,ltable7; for (i = 0; i < div_num; i++) { dindex = i*8; index0 = (data[dindex] >> nglobalm1) & nlocalm1; index1 = (data[dindex+1] >> nglobalm1) & nlocalm1; index2 = (data[dindex+2] >> nglobalm1) & nlocalm1; index3 = (data[dindex+3] >> nglobalm1) & nlocalm1; index4 = (data[dindex+4] >> nglobalm1) & nlocalm1; index5 = (data[dindex+5] >> nglobalm1) & nlocalm1; index6 = (data[dindex+6] >> nglobalm1) & nlocalm1; index7 = (data[dindex+7] >> nglobalm1) & nlocalm1; ltable0 = table[index0]; ltable1 = table[index1]; ltable2 = table[index2]; ltable3 = table[index3]; ltable4 = table[index4]; ltable5 = table[index5]; ltable6 = table[index6]; ltable7 = table[index7]; table[index0] = ltable0 ^ data[dindex]; table[index1] = ltable1 ^ data[dindex+1]; table[index2] = ltable2 ^ data[dindex+2]; table[index3] = ltable3 ^ data[dindex+3]; table[index4] = ltable4 ^ data[dindex+4]; table[index5] = ltable5 ^ data[dindex+5]; table[index6] = ltable6 ^ data[dindex+6]; table[index7] = ltable7 ^ data[dindex+7]; } for (i = loop_total; i < number; i++) { u64Int datum = data[i]; index = (datum >> nglobalm1) & nlocalm1; table[index] ^= datum; } } void HPCC_Power2NodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j,k; int logTableLocal,ipartner,iterate,niterate,iter_mod; int ndata,nkeep,nsend,nrecv, nglobalm1, nlocalm1, nkept; u64Int ran,datum; u64Int *data,*send,*send1,*send2; u64Int *recv[PITER][MAXLOGPROCS]; MPI_Status status; MPI_Request request[PITER][MAXLOGPROCS]; MPI_Request srequest; /* setup: should not really be part of this timed routine */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send1 = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send2 = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = send1; for (j = 0; j < PITER; j++) for (i = 0; i < logNumProcs; i++) recv[j][i] = (u64Int *) malloc(sizeof(u64Int)*RCHUNK); ran = HPCC_starts_LCG(4*GlobalStartMyProc); niterate = ProcNumUpdates / CHUNK; logTableLocal = logTableSize - logNumProcs; nglobalm1 = 64 - logTableSize; nlocalm1 = LocalTableSize - 1; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { iter_mod = iterate % PITER; for (i = 0; i < CHUNK; i++) { ran = LCG_MUL64 * ran + LCG_ADD64; data[i] = ran; } nkept = CHUNK; nrecv = 0; if (iter_mod == 0) for (k = 0; k < PITER; k++) for (j = 0; j < logNumProcs; j++) { ipartner = (1 << j) ^ MyProc; MPI_Irecv(recv[k][j],RCHUNK,INT64_DT,ipartner,0,MPI_COMM_WORLD, &request[k][j]); } for (j = 0; j < logNumProcs; j++) { nkeep = nsend = 0; send = (send == send1) ? send2 : send1; ipartner = (1 << j) ^ MyProc; if (ipartner > MyProc) { sort_data(data,data,send,nkept,&nkeep,&nsend,nglobalm1 + logTableLocal+j); if (j > 0) { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); sort_data(recv[iter_mod][j-1],data,send,nrecv,&nkeep, &nsend,nglobalm1 + logTableLocal+j); } } else { sort_data(data,send,data,nkept,&nsend,&nkeep,nglobalm1 + logTableLocal+j); if (j > 0) { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); sort_data(recv[iter_mod][j-1],send,data,nrecv,&nsend, &nkeep,nglobalm1 + logTableLocal+j); } } if (j > 0) MPI_Wait(&srequest,&status); MPI_Isend(send,nsend,INT64_DT,ipartner,0,MPI_COMM_WORLD,&srequest); if (j == (logNumProcs - 1)) update_table(data,HPCC_Table,nkeep,nglobalm1, nlocalm1); nkept = nkeep; } if (logNumProcs == 0) update_table(data,HPCC_Table,nkept,nglobalm1, nlocalm1); else { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); update_table(recv[iter_mod][j-1],HPCC_Table,nrecv,nglobalm1, nlocalm1); MPI_Wait(&srequest,&status); } ndata = nkept + nrecv; } /* clean up: should not really be part of this timed routine */ for (j = 0; j < PITER; j++) for (i = 0; i < logNumProcs; i++) free(recv[j][i]); free(data); free(send1); free(send2); } #endif hpcc-1.4.1/RandomAccess/MPIRandomAccessLCG_vanilla.c0000644000000000000000000003340511403763471017006 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" #define CHUNK MAX_TOTAL_PENDING_UPDATES #define CHUNKBIG (32*CHUNK) #ifdef RA_SANDIA_NOPT void HPCC_AnyNodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int ipartner,iterate,niterate,npartition,proclo,nlower,nupper,procmid; int ndata,nkeep,nsend,nrecv,index, nfrac; u64Int ran,datum,nglobalm1,indexmid; u64Int *data,*send, *offsets; MPI_Status status; /* setup: should not really be part of this timed routine NOTE: niterate must be computed from global TableSize * 4 not from ProcNumUpdates since that can be different on each proc round niterate up by 1 to do slightly more than required updates */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts_LCG(4*GlobalStartMyProc); offsets = (u64Int *) malloc((NumProcs+1)*sizeof(u64Int)); MPI_Allgather(&GlobalStartMyProc,1,INT64_DT,offsets,1,INT64_DT, MPI_COMM_WORLD); offsets[NumProcs] = TableSize; niterate = 4 * TableSize / NumProcs / CHUNK + 1; nglobalm1 = 64 - logTableSize; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = LCG_MUL64 * ran + LCG_ADD64; data[i] = ran; } ndata = CHUNK; npartition = NumProcs; proclo = 0; while (npartition > 1) { nlower = npartition/2; nupper = npartition - nlower; procmid = proclo + nlower; indexmid = offsets[procmid]; nkeep = nsend = 0; if (MyProc < procmid) { for (i = 0; i < ndata; i++) { if ((data[i] >> nglobalm1) >= indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if ((data[i] >> nglobalm1) < indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } if (nlower == nupper) { if (MyProc < procmid) ipartner = MyProc + nlower; else ipartner = MyProc - nlower; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc < procmid) { nfrac = (nlower - (MyProc-proclo)) * nsend / nupper; ipartner = MyProc + nlower; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner+1,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner+1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else if (MyProc > procmid && MyProc < procmid+nlower) { nfrac = (MyProc - procmid) * nsend / nlower; ipartner = MyProc - nlower; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner-1,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner-1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc == procmid) ipartner = MyProc - nlower; else ipartner = MyProc - nupper; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } } if (MyProc < procmid) npartition = nlower; else { proclo = procmid; npartition = nupper; } } for (i = 0; i < ndata; i++) { datum = data[i]; index = (datum >> nglobalm1) - GlobalStartMyProc; HPCC_Table[index] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); free(offsets); } void HPCC_Power2NodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int logTableLocal,ipartner,iterate,niterate; int ndata,nkeep,nsend,nrecv,index, nglobalm1, nlocalm1; u64Int ran,datum,procmask; u64Int *data,*send; MPI_Status status; /* setup: should not really be part of this timed routine */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts_LCG(4*GlobalStartMyProc); niterate = ProcNumUpdates / CHUNK; logTableLocal = logTableSize - logNumProcs; nlocalm1 = LocalTableSize - 1; nglobalm1 = 64 - logTableSize; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = LCG_MUL64 * ran + LCG_ADD64; data[i] = ran; } ndata = CHUNK; for (j = 0; j < logNumProcs; j++) { nkeep = nsend = 0; ipartner = (1 << j) ^ MyProc; procmask = ((u64Int) 1) << (nglobalm1 + logTableLocal + j); if (ipartner > MyProc) { for (i = 0; i < ndata; i++) { if (data[i] & procmask) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if (data[i] & procmask) data[nkeep++] = data[i]; else send[nsend++] = data[i]; } } MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } for (i = 0; i < ndata; i++) { datum = data[i]; index = datum >> nglobalm1; HPCC_Table[index & nlocalm1] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); } #endif hpcc-1.4.1/RandomAccess/MPIRandomAccess_opt.c0000644000000000000000000004445411403763471015642 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" #define CHUNK MAX_TOTAL_PENDING_UPDATES #define CHUNKBIG (32*CHUNK) #define RCHUNK (16384) #define PITER 8 #define MAXLOGPROCS 20 #ifdef RA_SANDIA_OPT2 void AnyNodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int ipartner,iterate,niterate,npartition,proclo,nlower,nupper,procmid; int ndata,nkeep,nsend,nrecv,index, nfrac; u64Int ran,datum,nglobalm1,indexmid; u64Int *data,*send, *offsets; MPI_Status status; /* setup: should not really be part of this timed routine NOTE: niterate must be computed from global TableSize * 4 not from ProcNumUpdates since that can be different on each proc round niterate up by 1 to do slightly more than required updates */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts(4*GlobalStartMyProc); offsets = (u64Int *) malloc((NumProcs+1)*sizeof(u64Int)); MPI_Allgather(&GlobalStartMyProc,1,INT64_DT,offsets,1,INT64_DT, MPI_COMM_WORLD); offsets[NumProcs] = TableSize; niterate = 4 * TableSize / NumProcs / CHUNK + 1; nglobalm1 = TableSize - 1; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = (ran << 1) ^ ((s64Int) ran < ZERO64B ? POLY : ZERO64B); data[i] = ran; } ndata = CHUNK; npartition = NumProcs; proclo = 0; while (npartition > 1) { nlower = npartition/2; nupper = npartition - nlower; procmid = proclo + nlower; indexmid = offsets[procmid]; nkeep = nsend = 0; if (MyProc < procmid) { for (i = 0; i < ndata; i++) { if ((data[i] & nglobalm1) >= indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if ((data[i] & nglobalm1) < indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } if (nlower == nupper) { if (MyProc < procmid) ipartner = MyProc + nlower; else ipartner = MyProc - nlower; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc < procmid) { nfrac = (nlower - (MyProc-proclo)) * nsend / nupper; ipartner = MyProc + nlower; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner+1,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner+1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else if (MyProc > procmid && MyProc < procmid+nlower) { nfrac = (MyProc - procmid) * nsend / nlower; ipartner = MyProc - nlower; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner-1,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner-1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc == procmid) ipartner = MyProc - nlower; else ipartner = MyProc - nupper; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } } if (MyProc < procmid) npartition = nlower; else { proclo = procmid; npartition = nupper; } } for (i = 0; i < ndata; i++) { datum = data[i]; index = (datum & nglobalm1) - GlobalStartMyProc; HPCC_Table[index] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); free(offsets); } /* This sort is manually unrolled to make sure the compiler can see * the parallelism -KDU */ static void sort_data(u64Int *source, u64Int *nomatch, u64Int *match, int number, int *nnomatch, int *nmatch, int mask_shift) { int i,dindex,myselect[8],counts[2]; int div_num = number / 8; int loop_total = div_num * 8; u64Int procmask = ((u64Int) 1) << mask_shift; u64Int *buffers[2]; buffers[0] = nomatch; counts[0] = *nnomatch; buffers[1] = match; counts[1] = *nmatch; for (i = 0; i < div_num; i++) { dindex = i*8; myselect[0] = (source[dindex] & procmask) >> mask_shift; myselect[1] = (source[dindex+1] & procmask) >> mask_shift; myselect[2] = (source[dindex+2] & procmask) >> mask_shift; myselect[3] = (source[dindex+3] & procmask) >> mask_shift; myselect[4] = (source[dindex+4] & procmask) >> mask_shift; myselect[5] = (source[dindex+5] & procmask) >> mask_shift; myselect[6] = (source[dindex+6] & procmask) >> mask_shift; myselect[7] = (source[dindex+7] & procmask) >> mask_shift; buffers[myselect[0]][counts[myselect[0]]++] = source[dindex]; buffers[myselect[1]][counts[myselect[1]]++] = source[dindex+1]; buffers[myselect[2]][counts[myselect[2]]++] = source[dindex+2]; buffers[myselect[3]][counts[myselect[3]]++] = source[dindex+3]; buffers[myselect[4]][counts[myselect[4]]++] = source[dindex+4]; buffers[myselect[5]][counts[myselect[5]]++] = source[dindex+5]; buffers[myselect[6]][counts[myselect[6]]++] = source[dindex+6]; buffers[myselect[7]][counts[myselect[7]]++] = source[dindex+7]; } for (i = loop_total; i < number; i++) { u64Int mydata = source[i]; if (mydata & procmask) buffers[1][counts[1]++] = mydata; else buffers[0][counts[0]++] = mydata; } *nnomatch = counts[0]; *nmatch = counts[1]; } /* Manual unrolling is a significant win if -Msafeptr is used -KDU */ static void update_table(u64Int *data, u64Int *table, int number, int nlocalm1) { int i,dindex,index; int div_num = number / 8; int loop_total = div_num * 8; u64Int index0,index1,index2,index3,index4,index5,index6,index7; u64Int ltable0,ltable1,ltable2,ltable3,ltable4,ltable5,ltable6,ltable7; for (i = 0; i < div_num; i++) { dindex = i*8; index0 = data[dindex] & nlocalm1; index1 = data[dindex+1] & nlocalm1; index2 = data[dindex+2] & nlocalm1; index3 = data[dindex+3] & nlocalm1; index4 = data[dindex+4] & nlocalm1; index5 = data[dindex+5] & nlocalm1; index6 = data[dindex+6] & nlocalm1; index7 = data[dindex+7] & nlocalm1; ltable0 = table[index0]; ltable1 = table[index1]; ltable2 = table[index2]; ltable3 = table[index3]; ltable4 = table[index4]; ltable5 = table[index5]; ltable6 = table[index6]; ltable7 = table[index7]; table[index0] = ltable0 ^ data[dindex]; table[index1] = ltable1 ^ data[dindex+1]; table[index2] = ltable2 ^ data[dindex+2]; table[index3] = ltable3 ^ data[dindex+3]; table[index4] = ltable4 ^ data[dindex+4]; table[index5] = ltable5 ^ data[dindex+5]; table[index6] = ltable6 ^ data[dindex+6]; table[index7] = ltable7 ^ data[dindex+7]; } for (i = loop_total; i < number; i++) { u64Int datum = data[i]; index = datum & nlocalm1; table[index] ^= datum; } } void Power2NodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j,k; int logTableLocal,ipartner,iterate,niterate,iter_mod; int ndata,nkeep,nsend,nrecv,nlocalm1, nkept; u64Int ran,datum,procmask; u64Int *data,*send,*send1,*send2; u64Int *recv[PITER][MAXLOGPROCS]; MPI_Status status; MPI_Request request[PITER][MAXLOGPROCS]; MPI_Request srequest; /* setup: should not really be part of this timed routine */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send1 = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send2 = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = send1; for (j = 0; j < PITER; j++) for (i = 0; i < logNumProcs; i++) recv[j][i] = (u64Int *) malloc(sizeof(u64Int)*RCHUNK); ran = HPCC_starts(4*GlobalStartMyProc); niterate = ProcNumUpdates / CHUNK; logTableLocal = logTableSize - logNumProcs; nlocalm1 = LocalTableSize - 1; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { iter_mod = iterate % PITER; for (i = 0; i < CHUNK; i++) { ran = (ran << 1) ^ ((s64Int) ran < ZERO64B ? POLY : ZERO64B); data[i] = ran; } nkept = CHUNK; nrecv = 0; if (iter_mod == 0) for (k = 0; k < PITER; k++) for (j = 0; j < logNumProcs; j++) { ipartner = (1 << j) ^ MyProc; MPI_Irecv(recv[k][j],RCHUNK,INT64_DT,ipartner,0,MPI_COMM_WORLD, &request[k][j]); } for (j = 0; j < logNumProcs; j++) { nkeep = nsend = 0; send = (send == send1) ? send2 : send1; ipartner = (1 << j) ^ MyProc; procmask = ((u64Int) 1) << (logTableLocal + j); if (ipartner > MyProc) { sort_data(data,data,send,nkept,&nkeep,&nsend,logTableLocal+j); if (j > 0) { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); sort_data(recv[iter_mod][j-1],data,send,nrecv,&nkeep, &nsend,logTableLocal+j); } } else { sort_data(data,send,data,nkept,&nsend,&nkeep,logTableLocal+j); if (j > 0) { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); sort_data(recv[iter_mod][j-1],send,data,nrecv,&nsend, &nkeep,logTableLocal+j); } } if (j > 0) MPI_Wait(&srequest,&status); MPI_Isend(send,nsend,INT64_DT,ipartner,0,MPI_COMM_WORLD,&srequest); if (j == (logNumProcs - 1)) update_table(data,HPCC_Table,nkeep,nlocalm1); nkept = nkeep; } if (logNumProcs == 0) update_table(data,HPCC_Table,nkept,nlocalm1); else { MPI_Wait(&request[iter_mod][j-1],&status); MPI_Get_count(&status,INT64_DT,&nrecv); update_table(recv[iter_mod][j-1],HPCC_Table,nrecv,nlocalm1); MPI_Wait(&srequest,&status); } ndata = nkept + nrecv; } /* clean up: should not really be part of this timed routine */ for (j = 0; j < PITER; j++) for (i = 0; i < logNumProcs; i++) free(recv[j][i]); free(data); free(send1); free(send2); } #endif hpcc-1.4.1/RandomAccess/MPIRandomAccess_vanilla.c0000644000000000000000000003332411403763471016460 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * Select the memory size to be the power of two such that 2^n <= 1/2 of the * total memory. Each CPU operates on its own address stream, and the single * table may be distributed among nodes. The distribution of memory to nodes * is left to the implementer. A uniform data distribution may help balance * the workload, while non-uniform data distributions may simplify the * calculations that identify processor location by eliminating the requirement * for integer divides. A small (less than 1%) percentage of missed updates * are permitted. * * When implementing a benchmark that measures GUPS on a distributed memory * multiprocessor system, it may be required to define constraints as to how * far in the random address stream each node is permitted to "look ahead". * Likewise, it may be required to define a constraint as to the number of * update messages that can be stored before processing to permit multi-level * parallelism for those systems that support such a paradigm. The limits on * "look ahead" and "stored updates" are being implemented to assure that the * benchmark meets the intent to profile memory architecture and not induce * significant artificial data locality. For the purpose of measuring GUPS, * we will stipulate that each process is permitted to look ahead no more than * 1024 random address stream samples with the same number of update messages * stored before processing. * * The supplied MPI-1 code generates the input stream {A} on all processors * and the global table has been distributed as uniformly as possible to * balance the workload and minimize any Amdahl fraction. This code does not * exploit "look-ahead". Addresses are sent to the appropriate processor * where the table entry resides as soon as each address is calculated. * Updates are performed as addresses are received. Each message is limited * to a single 64 bit long integer containing element ai from {A}. * Local offsets for T[ ] are extracted by the destination processor. * * If the number of processors is equal to a power of two, then the global * table can be distributed equally over the processors. In addition, the * processor number can be determined from that portion of the input stream * that identifies the address into the global table by masking off log2(p) * bits in the address. * * If the number of processors is not equal to a power of two, then the global * table cannot be equally distributed between processors. In the MPI-1 * implementation provided, there has been an attempt to minimize the differences * in workloads and the largest difference in elements of T[ ] is one. The * number of values in the input stream generated by each processor will be * related to the number of global table entries on each processor. * * The MPI-1 version of RandomAccess treats the potential instance where the * number of processors is a power of two as a special case, because of the * significant simplifications possible because processor location and local * offset can be determined by applying masks to the input stream values. * The non power of two case uses an integer division to determine the processor * location. The integer division will be more costly in terms of machine * cycles to perform than the bit masking operations * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* Jan 2005 * * This code has been modified to allow local bucket sorting of updates. * The total maximum number of updates in the local buckets of a process * is currently defined in "RandomAccess.h" as MAX_TOTAL_PENDING_UPDATES. * When the total maximum number of updates is reached, the process selects * the bucket (or destination process) with the largest number of * updates and sends out all the updates in that bucket. See buckets.c * for details about the buckets' implementation. * * This code also supports posting multiple MPI receive descriptors (based * on a contribution by David Addison). * * In addition, this implementation provides an option for limiting * the execution time of the benchmark to a specified time bound * (see time_bound.c). The time bound is currently defined in * time_bound.h, but it should be a benchmark parameter. By default * the benchmark will execute the recommended number of updates, * that is, four times the global table size. */ #include #include "RandomAccess.h" #include "buckets.h" #include "time_bound.h" #include "verification.h" #define CHUNK MAX_TOTAL_PENDING_UPDATES #define CHUNKBIG (32*CHUNK) #ifdef RA_SANDIA_NOPT void AnyNodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int ipartner,iterate,niterate,npartition,proclo,nlower,nupper,procmid; int ndata,nkeep,nsend,nrecv,index, nfrac; u64Int ran,datum,nglobalm1,indexmid; u64Int *data,*send, *offsets; MPI_Status status; /* setup: should not really be part of this timed routine NOTE: niterate must be computed from global TableSize * 4 not from ProcNumUpdates since that can be different on each proc round niterate up by 1 to do slightly more than required updates */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts(4*GlobalStartMyProc); offsets = (u64Int *) malloc((NumProcs+1)*sizeof(u64Int)); MPI_Allgather(&GlobalStartMyProc,1,INT64_DT,offsets,1,INT64_DT, MPI_COMM_WORLD); offsets[NumProcs] = TableSize; niterate = 4 * TableSize / NumProcs / CHUNK + 1; nglobalm1 = TableSize - 1; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = (ran << 1) ^ ((s64Int) ran < ZERO64B ? POLY : ZERO64B); data[i] = ran; } ndata = CHUNK; npartition = NumProcs; proclo = 0; while (npartition > 1) { nlower = npartition/2; nupper = npartition - nlower; procmid = proclo + nlower; indexmid = offsets[procmid]; nkeep = nsend = 0; if (MyProc < procmid) { for (i = 0; i < ndata; i++) { if ((data[i] & nglobalm1) >= indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if ((data[i] & nglobalm1) < indexmid) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } if (nlower == nupper) { if (MyProc < procmid) ipartner = MyProc + nlower; else ipartner = MyProc - nlower; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc < procmid) { nfrac = (nlower - (MyProc-proclo)) * nsend / nupper; ipartner = MyProc + nlower; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner+1,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner+1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else if (MyProc > procmid && MyProc < procmid+nlower) { nfrac = (MyProc - procmid) * nsend / nlower; ipartner = MyProc - nlower; MPI_Sendrecv(&send[nfrac],nsend-nfrac,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); nkeep += nrecv; MPI_Sendrecv(send,nfrac,INT64_DT,ipartner-1,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner-1,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } else { if (MyProc == procmid) ipartner = MyProc - nlower; else ipartner = MyProc - nupper; MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0,&data[nkeep], CHUNKBIG,INT64_DT,ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } } if (MyProc < procmid) npartition = nlower; else { proclo = procmid; npartition = nupper; } } for (i = 0; i < ndata; i++) { datum = data[i]; index = (datum & nglobalm1) - GlobalStartMyProc; HPCC_Table[index] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); free(offsets); } void Power2NodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req) { int i,j; int logTableLocal,ipartner,iterate,niterate; int ndata,nkeep,nsend,nrecv,index,nlocalm1; u64Int ran,datum,procmask; u64Int *data,*send; MPI_Status status; /* setup: should not really be part of this timed routine */ data = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); send = (u64Int *) malloc(CHUNKBIG*sizeof(u64Int)); ran = HPCC_starts(4*GlobalStartMyProc); niterate = ProcNumUpdates / CHUNK; logTableLocal = logTableSize - logNumProcs; nlocalm1 = LocalTableSize - 1; /* actual update loop: this is only section that should be timed */ for (iterate = 0; iterate < niterate; iterate++) { for (i = 0; i < CHUNK; i++) { ran = (ran << 1) ^ ((s64Int) ran < ZERO64B ? POLY : ZERO64B); data[i] = ran; } ndata = CHUNK; for (j = 0; j < logNumProcs; j++) { nkeep = nsend = 0; ipartner = (1 << j) ^ MyProc; procmask = ((u64Int) 1) << (logTableLocal + j); if (ipartner > MyProc) { for (i = 0; i < ndata; i++) { if (data[i] & procmask) send[nsend++] = data[i]; else data[nkeep++] = data[i]; } } else { for (i = 0; i < ndata; i++) { if (data[i] & procmask) data[nkeep++] = data[i]; else send[nsend++] = data[i]; } } MPI_Sendrecv(send,nsend,INT64_DT,ipartner,0, &data[nkeep],CHUNKBIG,INT64_DT, ipartner,0,MPI_COMM_WORLD,&status); MPI_Get_count(&status,INT64_DT,&nrecv); ndata = nkeep + nrecv; } for (i = 0; i < ndata; i++) { datum = data[i]; index = datum & nlocalm1; HPCC_Table[index] ^= datum; } } /* clean up: should not really be part of this timed routine */ free(data); free(send); } #endif hpcc-1.4.1/RandomAccess/RandomAccess.h0000644000000000000000000001040311403763471014402 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* Random number generator */ #ifdef LONG_IS_64BITS #define POLY 0x0000000000000007UL #define PERIOD 1317624576693539401L #else #define POLY 0x0000000000000007ULL #define PERIOD 1317624576693539401LL #endif /* Macros for timing */ #define CPUSEC() (HPL_timer_cputime()) #define RTSEC() (MPI_Wtime()) extern u64Int HPCC_starts (s64Int); extern u64Int HPCC_starts_LCG (s64Int); #define WANT_MPI2_TEST 0 #define HPCC_TRUE 1 #define HPCC_FALSE 0 #define HPCC_DONE 0 #define FINISHED_TAG 1 #define UPDATE_TAG 2 #define USE_NONBLOCKING_SEND 1 #define MAX_TOTAL_PENDING_UPDATES 1024 #define LOCAL_BUFFER_SIZE MAX_TOTAL_PENDING_UPDATES #define USE_MULTIPLE_RECV 1 #ifdef USE_MULTIPLE_RECV #define MAX_RECV 16 #else #define MAX_RECV 1 #endif #define LCG_MUL64 6364136223846793005ULL #define LCG_ADD64 1 extern u64Int *HPCC_Table; extern u64Int LocalSendBuffer[LOCAL_BUFFER_SIZE]; extern u64Int LocalRecvBuffer[MAX_RECV*LOCAL_BUFFER_SIZE]; extern void AnyNodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req); extern void Power2NodesMPIRandomAccessUpdate(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req); extern void HPCC_AnyNodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req); extern void HPCC_Power2NodesMPIRandomAccessUpdate_LCG(u64Int logTableSize, u64Int TableSize, s64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, s64Int ProcNumUpdates, MPI_Datatype INT64_DT, MPI_Status *finish_statuses, MPI_Request *finish_req); extern int HPCC_RandomAccess(HPCC_Params *params, int doIO, double *GUPs, int *failure); extern int HPCC_RandomAccess_LCG(HPCC_Params *params, int doIO, double *GUPs, int *failure); #if defined( RA_SANDIA_NOPT ) #define HPCC_RA_ALGORITHM 1 #elif defined( RA_SANDIA_OPT2 ) #define HPCC_RA_ALGORITHM 2 #else #define HPCC_RA_STDALG 1 #define HPCC_RA_ALGORITHM 0 #endif hpcc-1.4.1/RandomAccess/buckets.c0000644000000000000000000000606311256503657013506 00000000000000/* buckets.c * * Each process (PE) has a set of buckets, one for each possible * destination PE. Each set of buckets is implementated as an * array of objects, one for each destination PE, where each object * keeps the number of updates currently in the bucket and a pointer * to a list of updates. * The motivation for using lists (instead of fixed size buckets) * is to keep the memory requirements low as the number of processes * increase. To avoid the overheads of allocating memory dynamically, * a pool of memory is previously allocated and objetcs are * allocated/returned from/to this pool (see pool.c for details). * * An auxiliary data structure keeps the local buckets ordered * according to the number of updates of each bucket (see heap.c). * */ #include #include "RandomAccess.h" #include "buckets.h" #include "heap.h" #include "pool.h" /* memory pool for updates */ static POOL *Update_Pool; Bucket_Ptr HPCC_InitBuckets(int numPEs, int maxNumUpdates) { Bucket_Ptr Buckets; int i; Buckets = (Bucket_Ptr) malloc (numPEs * sizeof(Bucket_T)); for (i=0; ivalue = ran; update->forward = bucket->updateList; bucket->updateList = update; bucket->numUpdates++; numUpdates = bucket->numUpdates; if (numUpdates == 1) { /* this is the first update for this PE since last send */ HPCC_ra_Heap_Insert (pe, numUpdates); } else { /* PE already in heap, just increment number of updates */ HPCC_ra_Heap_IncrementKey(pe, numUpdates); } } int HPCC_GetUpdates(Bucket_Ptr Buckets, u64Int *bufferPtr, int bufferSize, int *peUpdates) { int pe; Bucket_Ptr bucket; Update_Ptr update, tmp; u64Int *buffer; HPCC_ra_Heap_ExtractMax (&pe, peUpdates); bucket = Buckets + pe; /* bucket = &(Buckets[pe]); */ /* copy updates to buffer */ update = bucket->updateList; buffer = bufferPtr; while (update != NULL_UPDATE_PTR) { *buffer = (u64Int)(update->value); buffer ++; tmp = update; update = update->forward; HPCC_PoolReturnObj(Update_Pool, tmp); } *peUpdates = bucket->numUpdates; bucket->numUpdates = 0; bucket->updateList = NULL_UPDATE_PTR; return(pe); } void HPCC_FreeBuckets (Bucket_Ptr Buckets, int numPEs) { Update_Ptr ptr1, ptr2; int i; HPCC_ra_Heap_Free(); for (i = 0; i < numPEs; i ++) { ptr1 = Buckets[i].updateList; while (ptr1 != NULL_UPDATE_PTR) { ptr2 = ptr1; ptr1 = ptr1->forward; HPCC_PoolReturnObj(Update_Pool, ptr2); } } HPCC_PoolFree(Update_Pool); free(Update_Pool); free (Buckets); } hpcc-1.4.1/RandomAccess/buckets.h0000644000000000000000000000120511256503657013504 00000000000000 typedef struct update_s { char *poolNext; /* pointer for memory pool */ u64Int value; struct update_s *forward; } Update_T, *Update_Ptr; #define NULL_UPDATE_PTR ((Update_Ptr) NULL) typedef struct pe_bucket_s { int numUpdates; Update_Ptr updateList; } Bucket_T, *Bucket_Ptr; #define NULL_BUCKET_PTR ((Bucket_Ptr) NULL) extern Bucket_Ptr HPCC_InitBuckets(int numPEs, int maxNumUpdates); extern void HPCC_FreeBuckets(Bucket_Ptr buckets, int numPEs); extern void HPCC_InsertUpdate(u64Int ran, int pe, Bucket_Ptr buckets); extern int HPCC_GetUpdates(Bucket_Ptr buckets, u64Int *buffer, int bufferSize, int *peUpdates); hpcc-1.4.1/RandomAccess/core_single_cpu.c0000644000000000000000000001332211256503657015202 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* * This code has been contributed by the DARPA HPCS program. Contact * David Koester or Bob Lucas * if you have questions. * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* * This file contains the computational core of the single cpu version * of GUPS. The inner loop should easily be vectorized by compilers * with such support. * * This core is used by both the single_cpu and star_single_cpu tests. */ #include #include "RandomAccess.h" /* Number of updates to table (suggested: 4x number of table entries) */ #define NUPDATE (4 * TableSize) static void RandomAccessUpdate(u64Int TableSize, u64Int *Table) { u64Int i; u64Int ran[128]; /* Current random numbers */ int j; /* Perform updates to main table. The scalar equivalent is: * * u64Int ran; * ran = 1; * for (i=0; ioutFname, "a" ); if (! outFile) { outFile = stderr; fprintf( outFile, "Cannot open output file.\n" ); return 1; } } /* calculate local memory per node for the update table */ totalMem = params->HPLMaxProcMem; totalMem /= sizeof(u64Int); /* calculate the size of update array (must be a power of 2) */ for (totalMem *= 0.5, logTableSize = 0, TableSize = 1; totalMem >= 1.0; totalMem *= 0.5, logTableSize++, TableSize <<= 1) ; /* EMPTY */ Table = HPCC_XMALLOC( u64Int, TableSize ); if (! Table) { if (doIO) { fprintf( outFile, "Failed to allocate memory for the update table (" FSTR64 ").\n", TableSize); fclose( outFile ); } return 1; } params->RandomAccess_N = (s64Int)TableSize; /* Print parameters for run */ if (doIO) { fprintf( outFile, "Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize,TableSize); fprintf( outFile, "Number of updates = " FSTR64 "\n", NUPDATE); } /* Initialize main table */ for (i=0; i 0.0 ? 1.0 / realtime : -1.0); *GUPs *= 1e-9*NUPDATE; /* Print timing results */ if (doIO) { fprintf( outFile, "CPU time used = %.6f seconds\n", cputime); fprintf( outFile, "Real time used = %.6f seconds\n", realtime); fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs ); } /* Verification of results (in serial or "safe" mode; optional) */ temp = 0x1; for (i=0; i or Bob Lucas * if you have questions. * * GUPS (Giga UPdates per Second) is a measurement that profiles the memory * architecture of a system and is a measure of performance similar to MFLOPS. * The HPCS HPCchallenge RandomAccess benchmark is intended to exercise the * GUPS capability of a system, much like the LINPACK benchmark is intended to * exercise the MFLOPS capability of a computer. In each case, we would * expect these benchmarks to achieve close to the "peak" capability of the * memory system. The extent of the similarities between RandomAccess and * LINPACK are limited to both benchmarks attempting to calculate a peak system * capability. * * GUPS is calculated by identifying the number of memory locations that can be * randomly updated in one second, divided by 1 billion (1e9). The term "randomly" * means that there is little relationship between one address to be updated and * the next, except that they occur in the space of one half the total system * memory. An update is a read-modify-write operation on a table of 64-bit words. * An address is generated, the value at that address read from memory, modified * by an integer operation (add, and, or, xor) with a literal value, and that * new value is written back to memory. * * We are interested in knowing the GUPS performance of both entire systems and * system subcomponents --- e.g., the GUPS rating of a distributed memory * multiprocessor the GUPS rating of an SMP node, and the GUPS rating of a * single processor. While there is typically a scaling of FLOPS with processor * count, a similar phenomenon may not always occur for GUPS. * * For additional information on the GUPS metric, the HPCchallenge RandomAccess * Benchmark,and the rules to run RandomAccess or modify it to optimize * performance -- see http://icl.cs.utk.edu/hpcc/ * */ /* * This file contains the computational core of the single cpu version * of GUPS. The inner loop should easily be vectorized by compilers * with such support. * * This core is used by both the single_cpu and star_single_cpu tests. */ #include #include "RandomAccess.h" /* Number of updates to table (suggested: 4x number of table entries) */ #define NUPDATE (4 * TableSize) static void RandomAccessUpdate_LCG(u64Int TableSize, u64Int *Table) { u64Int i; u64Int ran[128]; /* Current random numbers */ int j, logTableSize; /* Perform updates to main table. The scalar equivalent is: * * u64Int ran; * ran = 1; * for (i=0; i> (64 - logTableSize)] ^= ran; * } */ for (j=0; j<128; j++) ran[j] = HPCC_starts_LCG((NUPDATE/128) * j); logTableSize = 0; for (i = 1; i < TableSize; i <<= 1) logTableSize += 1; for (i=0; i> (64 - logTableSize)] ^= ran[j]; } } } int HPCC_RandomAccess_LCG(HPCC_Params *params, int doIO, double *GUPs, int *failure) { u64Int i; u64Int temp; double cputime; /* CPU time to update table */ double realtime; /* Real time to update table */ double totalMem; u64Int *Table; u64Int logTableSize, TableSize; FILE *outFile = NULL; if (doIO) { outFile = fopen( params->outFname, "a" ); if (! outFile) { outFile = stderr; fprintf( outFile, "Cannot open output file.\n" ); return 1; } } /* calculate local memory per node for the update table */ totalMem = params->HPLMaxProcMem; totalMem /= sizeof(u64Int); /* calculate the size of update array (must be a power of 2) */ for (totalMem *= 0.5, logTableSize = 0, TableSize = 1; totalMem >= 1.0; totalMem *= 0.5, logTableSize++, TableSize <<= 1) ; /* EMPTY */ Table = HPCC_XMALLOC( u64Int, TableSize ); if (! Table) { if (doIO) { fprintf( outFile, "Failed to allocate memory for the update table (" FSTR64 ").\n", TableSize); fclose( outFile ); } return 1; } params->RandomAccess_LCG_N = (s64Int)TableSize; /* Print parameters for run */ if (doIO) { fprintf( outFile, "Main table size = 2^" FSTR64 " = " FSTR64 " words\n", logTableSize,TableSize); fprintf( outFile, "Number of updates = " FSTR64 "\n", NUPDATE); } /* Initialize main table */ for (i=0; i 0.0 ? 1.0 / realtime : -1.0); *GUPs *= 1e-9*NUPDATE; /* Print timing results */ if (doIO) { fprintf( outFile, "CPU time used = %.6f seconds\n", cputime); fprintf( outFile, "Real time used = %.6f seconds\n", realtime); fprintf( outFile, "%.9f Billion(10^9) Updates per second [GUP/s]\n", *GUPs ); } /* Verification of results (in serial or "safe" mode; optional) */ temp = 0x1; for (i=0; i> (64 - (int)logTableSize)] ^= temp; } temp = 0; for (i=0; i #include #include "heap.h" #include "pool.h" static Heap_Record_Ptr *heap; /* heap of records */ static int heapNodes; /* number of records in heap */ static int *IndexToHeapNode; /* aux array that keeps mapping from record's indices to heap nodes */ static POOL *Heap_Pool; /* memory for heap records */ #define LEFT(x) (((x) << 1) + 1) #define RIGHT(x) (((x) << 1) + 2) #define PARENT(x) (((x)-1) >> 1) #define MAP_INDEX_TO_HEAP_NODE(node) (IndexToHeapNode[heap[(node)]->index] = (node)) void HPCC_ra_Heap_Init (int size) { int i; heap = (Heap_Record_Ptr *) malloc (size * sizeof (Heap_Record_Ptr)); heapNodes = 0; IndexToHeapNode = (int *) malloc (size * sizeof (int)); for (i = 0; i < size; i ++) { IndexToHeapNode[i] = NOT_A_NODE; } /* initialize memory pool for heap nodes */ Heap_Pool = HPCC_PoolInit (size, sizeof(Heap_Record)); } void HPCC_ra_Heap_Insert (int index, int key) { Heap_Record_Ptr newNode; int node, parent; newNode = (Heap_Record*) HPCC_PoolGetObj(Heap_Pool); newNode->index = index; newNode->key = key; node = heapNodes; parent = PARENT(node); heapNodes ++; while (node != 0 && key > heap[parent]->key) { heap[node] = heap[parent]; MAP_INDEX_TO_HEAP_NODE(node); node = parent; parent = PARENT(node); } heap[node] = newNode; IndexToHeapNode[index] = node; } void HPCC_ra_Heap_IncrementKey (int index, int key) { int node; int parent; int child; int done; Heap_Record_Ptr tmp; node = IndexToHeapNode[index]; if (node != NOT_A_NODE) { heap[node]->key = heap[node]->key + 1; /* _ra_Heapify (node); */ done = 0; child = node; while (!done && child > 0) { parent = PARENT(child); if (parent >= 0 && heap[parent]->key < heap[child]->key) { tmp = heap[child]; heap[child] = heap[parent]; MAP_INDEX_TO_HEAP_NODE(child); heap[parent] = tmp; MAP_INDEX_TO_HEAP_NODE(parent); child = parent; } else done = 1; } /* end _ra_Heapify (node); */ } } void HPCC_ra_Heap_ExtractMax (int *index, int *key) { Heap_Record_Ptr nodePtr; int parent, child; nodePtr = heap[HEAP_ROOT]; *index = nodePtr->index; *key = nodePtr->key; HPCC_PoolReturnObj(Heap_Pool, nodePtr); heapNodes --; nodePtr = heap[heapNodes]; parent = HEAP_ROOT; child = LEFT(parent); while (child <= heapNodes) { if (child < heapNodes && heap[child]->key < heap[child+1]->key) child ++; if (nodePtr->key >= heap[child]->key) break; heap[parent] = heap[child]; MAP_INDEX_TO_HEAP_NODE(parent); parent = child; child = LEFT(child); } heap[parent] = nodePtr; MAP_INDEX_TO_HEAP_NODE(parent); } void HPCC_ra_Heapify(int node) { /* assumes that the key of a given node can only be increased */ int parent; int child; int done; Heap_Record_Ptr tmp; done = 0; child = node; while (!done && child > 0) { parent = PARENT(child); if (parent >= 0 && heap[parent]->key < heap[child]->key) { tmp = heap[child]; heap[child] = heap[parent]; MAP_INDEX_TO_HEAP_NODE(child); heap[parent] = tmp; MAP_INDEX_TO_HEAP_NODE(parent); child = parent; } else done = 1; } } void HPCC_ra_Heapify_r(int node) { /* assumes that the key of a given entry can only be increased */ int parent; int child; Heap_Record_Ptr tmp; if (node > 0) { child = node; parent = PARENT(child); if (parent >= 0 && heap[parent]->key < heap[child]->key) { tmp = heap[child]; heap[child] = heap[parent]; MAP_INDEX_TO_HEAP_NODE(child); heap[parent] = tmp; MAP_INDEX_TO_HEAP_NODE(parent); child = parent; HPCC_ra_Heapify(child); } } } void HPCC_ra_Heap_Free() { HPCC_PoolFree(Heap_Pool); free(Heap_Pool); free(IndexToHeapNode); free(heap); } hpcc-1.4.1/RandomAccess/heap.h0000644000000000000000000000075411256503657012771 00000000000000 #define HEAP_ROOT 0 #define NOT_A_NODE (-1) typedef struct heap_record { char* poolNext; /* pointer for memory pool */ int index; int key; } Heap_Record, *Heap_Record_Ptr; extern void HPCC_ra_Heap_Init (int size); extern void HPCC_ra_Heap_Insert (int index, int key); extern void HPCC_ra_Heap_ExtractMax (int *index, int *key); extern void HPCC_ra_Heap_IncrementKey (int index, int key); extern void HPCC_ra_Heapify (int node); extern void HPCC_ra_Heap_Free (); hpcc-1.4.1/RandomAccess/pool.c0000644000000000000000000000412411256503657013013 00000000000000/* pool.c */ /* * POOL Operations: used to manage the allocation of memory. * In order to use a pool, the first element of each structure must be * a char pointer "poolNext", which maintain the pool lists. */ #include #include "pool.h" /* PoolInit: create a pool of objects */ POOL* HPCC_PoolInit(int numObjects, int objSize) { char* ptr; int i; POOL* poolPtr; poolPtr = (POOL *) malloc (1 * sizeof (POOL)); poolPtr->head = HPCC_NULL_PTR; /* Points to unallocated objects */ poolPtr->tail = HPCC_NULL_PTR; /* Points to unallocated objects */ poolPtr->numObjs = numObjects+1; /* Number of objects to allocate */ poolPtr->objSize = objSize; /* Size of each object */ ptr = (char*)malloc((poolPtr->numObjs)*(poolPtr->objSize)); /* Get a block of objects */ if (ptr == HPCC_NULL_PTR) { fprintf(stdout,"Malloc fails in PoolInit\n"); MPI_Abort( MPI_COMM_WORLD, -1 ); } poolPtr->poolBase = ptr; for(i = 0; i<(poolPtr->numObjs)-1; i++) { /* link together the new objects*/ *((char**)(ptr+i*(poolPtr->objSize))) = ptr+(i+1)*(poolPtr->objSize); /* setting up poolNext */ } poolPtr->head = ptr; poolPtr->tail = ptr + (poolPtr->numObjs - 1)*(poolPtr->objSize); /* adjust tail pointer */ *((char**)(poolPtr->tail)) = HPCC_NULL_PTR; /* last object has no next object */ return (poolPtr); } char *HPCC_PoolGetObj(POOL* poolPtr) { char *ptr; if (poolPtr->head == HPCC_NULL_PTR) { fprintf(stdout,"No unallocated objects in pool\n"); MPI_Abort( MPI_COMM_WORLD, -1 ); } ptr = poolPtr->head; poolPtr->head = *((char**)(poolPtr->head)); return ptr; } void HPCC_PoolReturnObj(POOL *poolPtr,void *optr) { if (poolPtr->tail) { *((char **) (poolPtr->tail)) = (char*)optr; *((char **) ((char *)optr)) = HPCC_NULL_PTR; poolPtr->tail = (char*)optr; } else { poolPtr->head = (char*)optr; poolPtr->tail = (char*)optr; *((char **) ((char *)optr)) = HPCC_NULL_PTR; } } void HPCC_PoolFree(POOL* poolPtr) { free(poolPtr->poolBase); } hpcc-1.4.1/RandomAccess/pool.h0000644000000000000000000000125511256503657013022 00000000000000 #define HPCC_NULL_PTR ((char *)0) typedef struct Pool_s { /* used to minimize the use of malloc */ char *head; /* pointer to the first element of the pool */ char *tail; /* pointer to the last element of the pool */ int numObjs; /* number of objects to malloc */ int objSize; /* size of objects in bytes */ char *poolBase; /* pointer to block of memory allocated for pool */ } POOL; extern POOL* HPCC_PoolInit(int numObjs, int objSize); extern char *HPCC_PoolGetObj(POOL *poolPtr); extern void HPCC_PoolReturnObj(POOL *poolPtr, void *objPtr); extern void HPCC_PoolFree(POOL *poolPtr); hpcc-1.4.1/RandomAccess/single_cpu.c0000644000000000000000000000350311256503657014172 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- * * This file contains the interface for the single cpu RandomAccess test. The * test is only run on a single (random) node in the MPI universe, with all * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected * CPU to finish the RandomAccess test. * * This test uses the computational core found in core_single_cpu.c */ #include #include "RandomAccess.h" int HPCC_SingleRandomAccess(HPCC_Params *params) { int myRank, commSize; int rv, errCount, rank, failure = 0; double localGUPs; double scl = 1.0 / RAND_MAX; FILE *outputFile = NULL; MPI_Comm comm = MPI_COMM_WORLD; localGUPs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); srand(time(NULL)); scl *= commSize; /* select a node at random, but not node 0 (unless there is just one node) */ if (1 == commSize) rank = 0; else for (rank = 0; ; rank = (int)(scl * rand())) { if (rank > 0 && rank < commSize) break; } MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ if (myRank == rank) /* if this node has been selected */ rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure ); MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */ MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ errCount = rv; params->SingleGUPs = localGUPs; if (failure) params->Failure = 1; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Node selected %d\n", rank ); fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/RandomAccess/single_cpu_lcg.c0000644000000000000000000000352011353467335015016 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- * * This file contains the interface for the single cpu RandomAccess test. The * test is only run on a single (random) node in the MPI universe, with all * other CPUs stuck (in theory, idle) in an MPI_Bcast waiting for the selected * CPU to finish the RandomAccess test. * * This test uses the computational core found in core_single_cpu.c */ #include #include "RandomAccess.h" int HPCC_SingleRandomAccess_LCG(HPCC_Params *params) { int myRank, commSize; int rv, errCount, rank, failure = 0; double localGUPs; double scl = 1.0 / RAND_MAX; FILE *outputFile = NULL; MPI_Comm comm = MPI_COMM_WORLD; localGUPs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); srand(time(NULL)); scl *= commSize; /* select a node at random, but not node 0 (unless there is just one node) */ if (1 == commSize) rank = 0; else for (rank = 0; ; rank = (int)(scl * rand())) { if (rank > 0 && rank < commSize) break; } MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ if (myRank == rank) /* if this node has been selected */ rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure ); MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ MPI_Bcast( &localGUPs, 1, MPI_DOUBLE, rank, comm ); /* broadcast GUPs */ MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ errCount = rv; params->Single_LCG_GUPs = localGUPs; if (failure) params->Failure = 1; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Node selected %d\n", rank ); fprintf( outputFile, "Single GUP/s %.6f\n", localGUPs ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/RandomAccess/star_single_cpu.c0000644000000000000000000000330711256503657015225 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- * * This file contains the interface for the star single cpu RandomAccess test. * The test runs on all cpus in the MPI universe, but there is no communication * between cpus during the process (each cpu runs its own version of the * single_cpu test). The final result is the average of the entire system. * * This test uses the computational core found in core_single_cpu.c */ #include #include "RandomAccess.h" int HPCC_StarRandomAccess(HPCC_Params *params) { int myRank, commSize; int rv, errCount, failure = 0, failureAll = 0; double minGUPs, avgGUPs, maxGUPs, localGUPs; FILE *outputFile = NULL; MPI_Comm comm = MPI_COMM_WORLD; minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); rv = HPCC_RandomAccess( params, 0 == myRank, &localGUPs, &failure ); MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); if (failureAll) params->Failure = 1; MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); avgGUPs /= commSize; MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm ); params->StarGUPs = avgGUPs; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs ); fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs ); fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/RandomAccess/star_single_cpu_lcg.c0000644000000000000000000000332411353467335016051 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- * * This file contains the interface for the star single cpu RandomAccess test. * The test runs on all cpus in the MPI universe, but there is no communication * between cpus during the process (each cpu runs its own version of the * single_cpu test). The final result is the average of the entire system. * * This test uses the computational core found in core_single_cpu.c */ #include #include "RandomAccess.h" int HPCC_StarRandomAccess_LCG(HPCC_Params *params) { int myRank, commSize; int rv, errCount, failure = 0, failureAll = 0; double minGUPs, avgGUPs, maxGUPs, localGUPs; FILE *outputFile = NULL; MPI_Comm comm = MPI_COMM_WORLD; minGUPs = avgGUPs = maxGUPs = localGUPs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); rv = HPCC_RandomAccess_LCG( params, 0 == myRank, &localGUPs, &failure ); MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); if (failureAll) params->Failure = 1; MPI_Reduce( &localGUPs, &minGUPs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &localGUPs, &avgGUPs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &localGUPs, &maxGUPs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); avgGUPs /= commSize; MPI_Bcast( &avgGUPs, 1, MPI_DOUBLE, 0, comm ); params->Star_LCG_GUPs = avgGUPs; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Minimum GUP/s %.6f\n", minGUPs ); fprintf( outputFile, "Average GUP/s %.6f\n", avgGUPs ); fprintf( outputFile, "Maximum GUP/s %.6f\n", maxGUPs ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/RandomAccess/time_bound.c0000644000000000000000000004171111256503657014172 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* time_bound.c * * Estimates the largest number of updates that * will keep the benchmark's execution time under * a specified time bound. * * The number of updates is estimated by performing * a fraction (currently 1% as defined in time_bound.h) * of the default number of updates and * measuring the execution time. The maximum * number of updates then is estimated based on * the average execution time per update and the time * bound. * * */ #include #include "RandomAccess.h" #include "time_bound.h" #include "buckets.h" void HPCC_Power2NodesTime(u64Int logTableSize, u64Int TableSize, u64Int LocalTableSize, u64Int MinLocalTableSize, u64Int GlobalStartMyProc, u64Int Top, int logNumProcs, int NumProcs, int Remainder, int MyProc, MPI_Datatype INT64_DT, double timeBound, u64Int* EstimatedNumIter, MPI_Status *finish_statuses, MPI_Request *finish_req) { s64Int i, j; int proc_count; s64Int SendCnt; u64Int Ran; s64Int WhichPe; u64Int LocalOffset; int logLocalTableSize = logTableSize - logNumProcs; int NumberReceiving = NumProcs - 1; #ifdef USE_MULTIPLE_RECV int index, NumRecvs; MPI_Request inreq[MAX_RECV] = { MPI_REQUEST_NULL }; MPI_Request outreq = MPI_REQUEST_NULL; #else MPI_Request inreq, outreq = MPI_REQUEST_NULL; #endif int bufferBase; u64Int inmsg; MPI_Status status; int have_done; int pe; int pendingUpdates; int maxPendingUpdates; int localBufferSize; int peUpdates; int recvUpdates; Bucket_Ptr Buckets; double ra_LoopRealTime; double iterTime; /* Initialize main table */ for (i=0; i 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[inmsg & (LocalTableSize - 1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); WhichPe = (Ran >> logLocalTableSize) & (NumProcs - 1); if (WhichPe == MyProc) { LocalOffset = (Ran & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates (Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; HPCC_Table[inmsg & (LocalTableSize - 1)] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates(Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j 4) ? (Mmin(4,MAX_RECV)) : 1; for (j = 0; j < NumRecvs; j++) MPI_Irecv(&LocalRecvBuffer[j*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[j]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif while (i < SendCnt) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); if (pendingUpdates < maxPendingUpdates) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); GlobalOffset = Ran & (TableSize-1); if ( GlobalOffset < Top) WhichPe = ( GlobalOffset / (MinLocalTableSize + 1) ); else WhichPe = ( (GlobalOffset - Remainder) / MinLocalTableSize ); if (WhichPe == MyProc) { LocalOffset = (Ran & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= Ran; } else { HPCC_InsertUpdate(Ran, WhichPe, Buckets); pendingUpdates++; } i++; } else { MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates (Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } } /* send updates in buckets */ while (pendingUpdates > 0) { /* receive messages */ do { #ifdef USE_MULTIPLE_RECV MPI_Testany(NumRecvs, inreq, &index, &have_done, &status); #else MPI_Test(&inreq, &have_done, &status); #endif if (have_done) { if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index*LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j < recvUpdates; j ++) { inmsg = LocalRecvBuffer[bufferBase+j]; LocalOffset = (inmsg & (TableSize - 1)) - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= inmsg; } } else if (status.MPI_TAG == FINISHED_TAG) { /* we got a done message. Thanks for playing... */ NumberReceiving--; } else { MPI_Abort( MPI_COMM_WORLD, -1 ); } #ifdef USE_MULTIPLE_RECV MPI_Irecv(&LocalRecvBuffer[index*LOCAL_BUFFER_SIZE], localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq[index]); #else MPI_Irecv(&LocalRecvBuffer, localBufferSize, INT64_DT, MPI_ANY_SOURCE, MPI_ANY_TAG, MPI_COMM_WORLD, &inreq); #endif } } while (have_done && NumberReceiving > 0); MPI_Test(&outreq, &have_done, MPI_STATUS_IGNORE); if (have_done) { outreq = MPI_REQUEST_NULL; pe = HPCC_GetUpdates (Buckets, LocalSendBuffer, localBufferSize, &peUpdates); MPI_Isend(&LocalSendBuffer, peUpdates, INT64_DT, (int)pe, UPDATE_TAG, MPI_COMM_WORLD, &outreq); pendingUpdates -= peUpdates; } } /* send our done messages */ for (proc_count = 0 ; proc_count < NumProcs ; ++proc_count) { if (proc_count == MyProc) { finish_req[MyProc] = MPI_REQUEST_NULL; continue; } /* send garbage - who cares, no one will look at it */ MPI_Isend(&Ran, 0, INT64_DT, proc_count, FINISHED_TAG, MPI_COMM_WORLD, finish_req + proc_count); } /* Finish everyone else up... */ while (NumberReceiving > 0) { #ifdef USE_MULTIPLE_RECV MPI_Waitany(NumRecvs, inreq, &index, &status); #else MPI_Wait(&inreq, &status); #endif if (status.MPI_TAG == UPDATE_TAG) { MPI_Get_count(&status, INT64_DT, &recvUpdates); #ifdef USE_MULTIPLE_RECV bufferBase = index * LOCAL_BUFFER_SIZE; #else bufferBase = 0; #endif for (j=0; j #include "RandomAccess.h" /* Utility routine to start random number generator at Nth step */ u64Int HPCC_starts(s64Int n) { int i, j; u64Int m2[64]; u64Int temp, ran; while (n < 0) n += PERIOD; while (n > PERIOD) n -= PERIOD; if (n == 0) return 0x1; temp = 0x1; for (i=0; i<64; i++) { m2[i] = temp; temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0); temp = (temp << 1) ^ ((s64Int) temp < 0 ? POLY : 0); } for (i=62; i>=0; i--) if ((n >> i) & 1) break; ran = 0x2; while (i > 0) { temp = 0; for (j=0; j<64; j++) if ((ran >> j) & 1) temp ^= m2[j]; ran = temp; i -= 1; if ((n >> i) & 1) ran = (ran << 1) ^ ((s64Int) ran < 0 ? POLY : 0); } return ran; } /* Utility routine to start LCG random number generator at Nth step */ u64Int HPCC_starts_LCG(s64Int n) { u64Int mul_k, add_k, ran, un; mul_k = LCG_MUL64; add_k = LCG_ADD64; ran = 1; for (un = (u64Int)n; un; un >>= 1) { if (un & 1) ran = mul_k * ran + add_k; add_k *= (mul_k + 1); mul_k *= mul_k; } return ran; } hpcc-1.4.1/RandomAccess/verification.c0000644000000000000000000002003511256503657014523 00000000000000#include #include "RandomAccess.h" /* Verification phase: local buckets to sort into */ #define BUCKET_SIZE 1024 #define SLOT_CNT 1 #define FIRST_SLOT 2 void HPCC_Power2NodesMPIRandomAccessCheck(u64Int logTableSize, u64Int TableSize, u64Int LocalTableSize, u64Int GlobalStartMyProc, int logNumProcs, int NumProcs, int MyProc, u64Int ProcNumUpdates, MPI_Datatype UINT64_DT, s64Int *NumErrors) { u64Int Ran; u64Int RanTmp; s64Int NextSlot; s64Int WhichPe; s64Int PeBucketBase; s64Int SendCnt; s64Int errors; int i; int j; s64Int *PeCheckDone; int LocalAllDone = HPCC_FALSE; int sAbort, rAbort; u64Int *LocalBuckets; /* buckets used in verification phase */ u64Int *GlobalBuckets; /* buckets used in verification phase */ LocalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! LocalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for local buckets.\n"); goto failed_localbuckets; } GlobalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! GlobalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for global buckets.\n"); goto failed_globalbuckets; } SendCnt = ProcNumUpdates; /* SendCnt = 4 * LocalTableSize; */ Ran = HPCC_starts (4 * GlobalStartMyProc); PeCheckDone = XMALLOC ( s64Int, NumProcs); for (i=0; i 0) { /* Initalize local buckets */ for (i=0; i0 ) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); WhichPe = (Ran >> (logTableSize - logNumProcs)) & (NumProcs - 1); PeBucketBase = WhichPe * (BUCKET_SIZE+FIRST_SLOT); NextSlot = LocalBuckets[PeBucketBase+SLOT_CNT]; LocalBuckets[PeBucketBase+NextSlot] = Ran; LocalBuckets[PeBucketBase+SLOT_CNT] = ++NextSlot; SendCnt--; } if (SendCnt == 0) for (i=0; i 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for local buckets.\n"); goto failed_localbuckets; } GlobalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! GlobalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for global buckets.\n"); goto failed_globalbuckets; } SendCnt = ProcNumUpdates; /* SendCnt = 4 * LocalTableSize; */ Ran = HPCC_starts (4 * GlobalStartMyProc); PeCheckDone = XMALLOC (s64Int, NumProcs); for (i=0; i 0) { /* Initalize local buckets */ for (i=0; i0 ) { Ran = (Ran << 1) ^ ((s64Int) Ran < ZERO64B ? POLY : ZERO64B); GlobalOffset = Ran & (TableSize-1); if ( GlobalOffset < Top) WhichPe = ( GlobalOffset / (MinLocalTableSize + 1) ); else WhichPe = ( (GlobalOffset - Remainder) / MinLocalTableSize ); PeBucketBase = WhichPe * (BUCKET_SIZE+FIRST_SLOT); NextSlot = LocalBuckets[PeBucketBase+SLOT_CNT]; LocalBuckets[PeBucketBase+NextSlot] = Ran; LocalBuckets[PeBucketBase+SLOT_CNT] = ++NextSlot; SendCnt--; } if (SendCnt == 0) for (i=0; i #include "RandomAccess.h" /* Verification phase: local buckets to sort into */ #define BUCKET_SIZE 1024 #define SLOT_CNT 1 #define FIRST_SLOT 2 void HPCC_Power2NodesMPIRandomAccessCheck_LCG(u64Int logTableSize, u64Int TableSize, u64Int LocalTableSize, u64Int GlobalStartMyProc, int logNumProcs, int NumProcs, int MyProc, u64Int ProcNumUpdates, MPI_Datatype UINT64_DT, s64Int *NumErrors) { u64Int Ran; u64Int RanTmp; s64Int NextSlot; s64Int WhichPe; s64Int PeBucketBase; s64Int SendCnt; s64Int errors; int i; int j; s64Int *PeCheckDone; int LocalAllDone = HPCC_FALSE; int sAbort, rAbort; u64Int *LocalBuckets; /* buckets used in verification phase */ u64Int *GlobalBuckets; /* buckets used in verification phase */ LocalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! LocalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for local buckets.\n"); goto failed_localbuckets; } GlobalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! GlobalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for global buckets.\n"); goto failed_globalbuckets; } SendCnt = ProcNumUpdates; /* SendCnt = 4 * LocalTableSize; */ Ran = HPCC_starts_LCG(4 * GlobalStartMyProc); PeCheckDone = XMALLOC ( s64Int, NumProcs); for (i=0; i 0) { /* Initalize local buckets */ for (i=0; i0 ) { Ran = LCG_MUL64 * Ran + LCG_ADD64; WhichPe = (Ran >> (64 - logNumProcs)) & (NumProcs - 1); PeBucketBase = WhichPe * (BUCKET_SIZE+FIRST_SLOT); NextSlot = LocalBuckets[PeBucketBase+SLOT_CNT]; LocalBuckets[PeBucketBase+NextSlot] = Ran; LocalBuckets[PeBucketBase+SLOT_CNT] = ++NextSlot; SendCnt--; } if (SendCnt == 0) for (i=0; i> (64 - logTableSize)) & (LocalTableSize-1)] ^= RanTmp; } LocalAllDone &= PeCheckDone[i]; } } } errors = 0; for (i=0; i 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for local buckets.\n"); goto failed_localbuckets; } GlobalBuckets = XMALLOC( u64Int, (NumProcs*(BUCKET_SIZE+FIRST_SLOT))); sAbort = 0; if (! GlobalBuckets) sAbort = 1; MPI_Allreduce( &sAbort, &rAbort, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD ); if (rAbort > 0) { if (MyProc == 0) fprintf(stderr, "Failed to allocate memory for global buckets.\n"); goto failed_globalbuckets; } SendCnt = ProcNumUpdates; /* SendCnt = 4 * LocalTableSize; */ Ran = HPCC_starts_LCG(4 * GlobalStartMyProc); PeCheckDone = XMALLOC (s64Int, NumProcs); for (i=0; i 0) { /* Initalize local buckets */ for (i=0; i0 ) { Ran = LCG_MUL64 * Ran + LCG_ADD64; GlobalOffset = Ran >> (64 - logTableSize); if ( GlobalOffset < Top) WhichPe = ( GlobalOffset / (MinLocalTableSize + 1) ); else WhichPe = ( (GlobalOffset - Remainder) / MinLocalTableSize ); PeBucketBase = WhichPe * (BUCKET_SIZE+FIRST_SLOT); NextSlot = LocalBuckets[PeBucketBase+SLOT_CNT]; LocalBuckets[PeBucketBase+NextSlot] = Ran; LocalBuckets[PeBucketBase+SLOT_CNT] = ++NextSlot; SendCnt--; } if (SendCnt == 0) for (i=0; i> (64 - logTableSize); LocalOffset = GlobalOffset - GlobalStartMyProc; HPCC_Table[LocalOffset] ^= RanTmp; } LocalAllDone &= PeCheckDone[i]; } } } /* no more local data */ errors = 0; for (i=0; i int HPCC_StarStream(HPCC_Params *params) { int myRank, commSize; int rv, errCount, failure = 0, failureAll = 0; double copyLocalGBs, copyMinGBs, copyMaxGBs, copyAvgGBs; double scaleLocalGBs, scaleMinGBs, scaleMaxGBs, scaleAvgGBs; double addLocalGBs, addMinGBs, addMaxGBs, addAvgGBs; double triadLocalGBs, triadMinGBs, triadMaxGBs, triadAvgGBs; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; copyLocalGBs = copyMinGBs = copyMaxGBs = copyAvgGBs = scaleLocalGBs = scaleMinGBs = scaleMaxGBs = scaleAvgGBs = addLocalGBs = addMinGBs = addMaxGBs = addAvgGBs = triadLocalGBs = triadMinGBs = triadMaxGBs = triadAvgGBs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); rv = HPCC_Stream( params, 0 == myRank, ©LocalGBs, &scaleLocalGBs, &addLocalGBs, &triadLocalGBs, &failure ); MPI_Reduce( &rv, &errCount, 1, MPI_INT, MPI_SUM, 0, comm ); MPI_Allreduce( &failure, &failureAll, 1, MPI_INT, MPI_MAX, comm ); if (failureAll) params->Failure = 1; MPI_Reduce( ©LocalGBs, ©MinGBs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( ©LocalGBs, ©AvgGBs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( ©LocalGBs, ©MaxGBs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); copyAvgGBs /= commSize; MPI_Reduce( &scaleLocalGBs, &scaleMinGBs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &scaleLocalGBs, &scaleAvgGBs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &scaleLocalGBs, &scaleMaxGBs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); scaleAvgGBs /= commSize; MPI_Reduce( &addLocalGBs, &addMinGBs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &addLocalGBs, &addAvgGBs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &addLocalGBs, &addMaxGBs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); addAvgGBs /= commSize; MPI_Reduce( &triadLocalGBs, &triadMinGBs, 1, MPI_DOUBLE, MPI_MIN, 0, comm ); MPI_Reduce( &triadLocalGBs, &triadAvgGBs, 1, MPI_DOUBLE, MPI_SUM, 0, comm ); MPI_Reduce( &triadLocalGBs, &triadMaxGBs, 1, MPI_DOUBLE, MPI_MAX, 0, comm ); triadAvgGBs /= commSize; MPI_Bcast( ©AvgGBs, 1, MPI_DOUBLE, 0, comm ); params->StarStreamCopyGBs = copyAvgGBs; MPI_Bcast( &scaleAvgGBs, 1, MPI_DOUBLE, 0, comm ); params->StarStreamScaleGBs = scaleAvgGBs; MPI_Bcast( &addAvgGBs, 1, MPI_DOUBLE, 0, comm ); params->StarStreamAddGBs = addAvgGBs; MPI_Bcast( &triadAvgGBs, 1, MPI_DOUBLE, 0, comm ); params->StarStreamTriadGBs = triadAvgGBs; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Minimum Copy GB/s %.6f\n", copyMinGBs ); fprintf( outputFile, "Average Copy GB/s %.6f\n", copyAvgGBs ); fprintf( outputFile, "Maximum Copy GB/s %.6f\n", copyMaxGBs ); fprintf( outputFile, "Minimum Scale GB/s %.6f\n", scaleMinGBs ); fprintf( outputFile, "Average Scale GB/s %.6f\n", scaleAvgGBs ); fprintf( outputFile, "Maximum Scale GB/s %.6f\n", scaleMaxGBs ); fprintf( outputFile, "Minimum Add GB/s %.6f\n", addMinGBs ); fprintf( outputFile, "Average Add GB/s %.6f\n", addAvgGBs ); fprintf( outputFile, "Maximum Add GB/s %.6f\n", addMaxGBs ); fprintf( outputFile, "Minimum Triad GB/s %.6f\n", triadMinGBs ); fprintf( outputFile, "Average Triad GB/s %.6f\n", triadAvgGBs ); fprintf( outputFile, "Maximum Triad GB/s %.6f\n", triadMaxGBs ); END_IO( myRank, outputFile ); return 0; } int HPCC_SingleStream(HPCC_Params *params) { int myRank, commSize; int rv, errCount, rank, failure = 0; double copyLocalGBs, scaleLocalGBs, addLocalGBs, triadLocalGBs; double scl = 1.0 / RAND_MAX; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; copyLocalGBs = scaleLocalGBs = addLocalGBs = triadLocalGBs = 0.0; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); srand(time(NULL)); scl *= commSize; /* select a node at random, but not node 0 (unless there is just one node) */ if (1 == commSize) rank = 0; else for (rank = 0; ; rank = (int)(scl * rand())) { if (rank > 0 && rank < commSize) break; } MPI_Bcast( &rank, 1, MPI_INT, 0, comm ); /* broadcast the rank selected on node 0 */ if (myRank == rank) /* if this node has been selected */ rv = HPCC_Stream( params, 0 == myRank, ©LocalGBs, &scaleLocalGBs, &addLocalGBs, &triadLocalGBs, &failure ); MPI_Bcast( &rv, 1, MPI_INT, rank, comm ); /* broadcast error code */ MPI_Bcast( &failure, 1, MPI_INT, rank, comm ); /* broadcast failure indication */ if (failure) params->Failure = 1; /* broadcast results */ MPI_Bcast( ©LocalGBs, 1, MPI_DOUBLE, rank, comm ); MPI_Bcast( &scaleLocalGBs, 1, MPI_DOUBLE, rank, comm ); MPI_Bcast( &addLocalGBs, 1, MPI_DOUBLE, rank, comm ); MPI_Bcast( &triadLocalGBs, 1, MPI_DOUBLE, rank, comm ); errCount = rv; params->SingleStreamCopyGBs = copyLocalGBs; params->SingleStreamScaleGBs = scaleLocalGBs; params->SingleStreamAddGBs = addLocalGBs; params->SingleStreamTriadGBs = triadLocalGBs; BEGIN_IO( myRank, params->outFname, outputFile); fprintf( outputFile, "Node(s) with error %d\n", errCount ); fprintf( outputFile, "Node selected %d\n", rank ); fprintf( outputFile, "Single STREAM Copy GB/s %.6f\n", copyLocalGBs ); fprintf( outputFile, "Single STREAM Scale GB/s %.6f\n", scaleLocalGBs ); fprintf( outputFile, "Single STREAM Add GB/s %.6f\n", addLocalGBs ); fprintf( outputFile, "Single STREAM Triad GB/s %.6f\n", triadLocalGBs ); END_IO( myRank, outputFile ); return 0; } hpcc-1.4.1/STREAM/stream.c0000644000000000000000000003344211256503657011773 00000000000000/*-----------------------------------------------------------------------*/ /* Program: Stream */ /* Revision: $Id$ */ /* Original code developed by John D. McCalpin */ /* Programmers: John D. McCalpin */ /* Joe R. Zagar */ /* */ /* This program measures memory transfer rates in GB/s for simple */ /* computational kernels coded in C. */ /*-----------------------------------------------------------------------*/ /* Copyright 1991-2003: John D. McCalpin */ /*-----------------------------------------------------------------------*/ /* License: */ /* 1. You are free to use this program and/or to redistribute */ /* this program. */ /* 2. You are free to modify this program for your own use, */ /* including commercial use, subject to the publication */ /* restrictions in item 3. */ /* 3. You are free to publish results obtained from running this */ /* program, or from works that you derive from this program, */ /* with the following limitations: */ /* 3a. In order to be referred to as "STREAM benchmark results", */ /* published results must be in conformance to the STREAM */ /* Run Rules, (briefly reviewed below) published at */ /* http://www.cs.virginia.edu/stream/ref.html */ /* and incorporated herein by reference. */ /* As the copyright holder, John McCalpin retains the */ /* right to determine conformity with the Run Rules. */ /* 3b. Results based on modified source code or on runs not in */ /* accordance with the STREAM Run Rules must be clearly */ /* labelled whenever they are published. Examples of */ /* proper labelling include: */ /* "tuned STREAM benchmark results" */ /* "based on a variant of the STREAM benchmark code" */ /* Other comparable, clear and reasonable labelling is */ /* acceptable. */ /* 3c. Submission of results to the STREAM benchmark web site */ /* is encouraged, but not required. */ /* 4. Use of this program or creation of derived works based on this */ /* program constitutes acceptance of these licensing restrictions. */ /* 5. Absolutely no warranty is expressed or implied. */ /*-----------------------------------------------------------------------*/ #include #include #include #ifdef _OPENMP #include #endif #define TUNED 1 #define VERBOSE 1 /* INSTRUCTIONS: * * 1) Stream requires a good bit of memory to run. Adjust the * value of 'N' (below) to give a 'timing calibration' of * at least 20 clock-ticks. This will provide rate estimates * that should be good to about 5% precision. */ static int VectorSize; # define N 2000000 # define NTIMES 10 # define OFFSET 0 /* * 3) Compile the code with full optimization. Many compilers * generate unreasonably bad code before the optimizer tightens * things up. If the results are unreasonably good, on the * other hand, the optimizer might be too smart for me! * * Try compiling with: * cc -O stream_omp.c -o stream_omp * * This is known to work on Cray, SGI, IBM, and Sun machines. * * * 4) Mail the results to mccalpin@cs.virginia.edu * Be sure to include: * a) computer hardware model number and software revision * b) the compiler flags * c) all of the output from the test case. * Thanks! * */ # define HLINE "-------------------------------------------------------------\n" static double *a, *b, *c; static double avgtime[4] = {0}, maxtime[4] = {0}, mintime[4] = {FLT_MAX,FLT_MAX,FLT_MAX,FLT_MAX}; static char *label[4] = {"Copy: ", "Scale: ", "Add: ", "Triad: "}; static double bytes[4] = { 2 * sizeof(double), 2 * sizeof(double), 3 * sizeof(double), 3 * sizeof(double) }; #define mysecond MPI_Wtime #ifdef TUNED extern void tuned_STREAM_Copy(void); extern void tuned_STREAM_Scale(double scalar); extern void tuned_STREAM_Add(void); extern void tuned_STREAM_Triad(double scalar); #endif static void checkSTREAMresults (FILE *outFile, int doIO, int *failure) { double aj,bj,cj,scalar; double asum,bsum,csum; double epsilon; int j,k; /* reproduce initialization */ aj = 1.0; bj = 2.0; cj = 0.0; /* a[] is modified during timing check */ aj = 2.0E0 * aj; /* now execute timing loop */ scalar = 3.0; for (k=0; k epsilon) { if (doIO) { fprintf( outFile, "Failed Validation on array a[]\n"); fprintf( outFile, " Expected : %f \n",aj); fprintf( outFile, " Observed : %f \n",asum); } } else if (fabs(bj-bsum)/bsum > epsilon) { if (doIO) { fprintf( outFile, "Failed Validation on array b[]\n"); fprintf( outFile, " Expected : %f \n",bj); fprintf( outFile, " Observed : %f \n",bsum); } } else if (fabs(cj-csum)/csum > epsilon) { if (doIO) { fprintf( outFile, "Failed Validation on array c[]\n"); fprintf( outFile, " Expected : %f \n",cj); fprintf( outFile, " Observed : %f \n",csum); } } else { *failure = 0; if (doIO) fprintf( outFile, "Solution Validates\n"); } } # define M 20 static int checktick() { int i, minDelta, Delta; double t1, t2, timesfound[M]; /* Collect a sequence of M unique time values from the system. */ for (i = 0; i < M; i++) { t1 = mysecond(); while( ((t2=mysecond()) - t1) < 1.0E-6 ) ; timesfound[i] = t1 = t2; } /* * Determine the minimum difference between these M values. * This result will be our estimate (in microseconds) for the * clock granularity. */ minDelta = 1000000; for (i = 1; i < M; i++) { Delta = (int)( 1.0E6 * (timesfound[i]-timesfound[i-1])); minDelta = Mmin(minDelta, Mmax(Delta,0)); } return(minDelta); } #undef M int HPCC_Stream(HPCC_Params *params, int doIO, double *copyGBs, double *scaleGBs, double *addGBs, double *triadGBs, int *failure) { int quantum; int BytesPerWord; register int j, k; double scalar, t, times[4][NTIMES]; FILE *outFile; double GiBs = 1073741824.0, curGBs; if (doIO) { outFile = fopen( params->outFname, "a" ); if (! outFile) { outFile = stderr; fprintf( outFile, "Cannot open output file.\n" ); return 1; } } VectorSize = HPCC_LocalVectorSize( params, 3, sizeof(double), 0 ); /* Need 3 vectors */ params->StreamVectorSize = VectorSize; a = HPCC_XMALLOC( double, VectorSize ); b = HPCC_XMALLOC( double, VectorSize ); c = HPCC_XMALLOC( double, VectorSize ); if (!a || !b || !c) { if (c) HPCC_free(c); if (b) HPCC_free(b); if (a) HPCC_free(a); if (doIO) { fprintf( outFile, "Failed to allocate memory (%d).\n", VectorSize ); fflush( outFile ); fclose( outFile ); } return 1; } /* --- SETUP --- determine precision and check timing --- */ if (doIO) { fprintf( outFile, HLINE); BytesPerWord = sizeof(double); fprintf( outFile, "This system uses %d bytes per DOUBLE PRECISION word.\n", BytesPerWord); fprintf( outFile, HLINE); fprintf( outFile, "Array size = %d, Offset = %d\n" , VectorSize, OFFSET); fprintf( outFile, "Total memory required = %.4f GiB.\n", (3.0 * BytesPerWord) * ( (double) VectorSize / GiBs)); fprintf( outFile, "Each test is run %d times, but only\n", NTIMES); fprintf( outFile, "the *best* time for each is used.\n"); } #ifdef _OPENMP if (doIO) fprintf( outFile, HLINE); #pragma omp parallel private(k) { #pragma omp single nowait { k = omp_get_num_threads(); if (doIO) fprintf( outFile, "Number of Threads requested = %i\n",k); params->StreamThreads = k; } } #endif /* Get initial value for system clock. */ #ifdef _OPENMP #pragma omp parallel for #endif for (j=0; j= 1) { if (doIO) fprintf( outFile, "Your clock granularity/precision appears to be " "%d microseconds.\n", quantum); } else { if (doIO) fprintf( outFile, "Your clock granularity appears to be " "less than one microsecond.\n"); } t = mysecond(); #ifdef _OPENMP #pragma omp parallel for #endif for (j = 0; j < VectorSize; j++) a[j] = 2.0E0 * a[j]; t = 1.0E6 * (mysecond() - t); if (doIO) { fprintf( outFile, "Each test below will take on the order" " of %d microseconds.\n", (int) t ); fprintf( outFile, " (= %d clock ticks)\n", (int) (t/quantum) ); fprintf( outFile, "Increase the size of the arrays if this shows that\n"); fprintf( outFile, "you are not getting at least 20 clock ticks per test.\n"); fprintf( outFile, HLINE); fprintf( outFile, "WARNING -- The above is only a rough guideline.\n"); fprintf( outFile, "For best results, please be sure you know the\n"); fprintf( outFile, "precision of your system timer.\n"); fprintf( outFile, HLINE); } /* --- MAIN LOOP --- repeat test cases NTIMES times --- */ scalar = 3.0; for (k=0; k 0.0 ? 1.0 / mintime[j] : -1.0); curGBs *= 1e-9 * bytes[j] * VectorSize; if (doIO) fprintf( outFile, "%s%11.4f %11.4f %11.4f %11.4f\n", label[j], curGBs, avgtime[j], mintime[j], maxtime[j]); switch (j) { case 0: *copyGBs = curGBs; break; case 1: *scaleGBs = curGBs; break; case 2: *addGBs = curGBs; break; case 3: *triadGBs = curGBs; break; } } if (doIO) fprintf( outFile, HLINE); /* --- Check Results --- */ checkSTREAMresults( outFile, doIO, failure ); if (doIO) fprintf( outFile, HLINE); HPCC_free(c); HPCC_free(b); HPCC_free(a); if (doIO) { fflush( outFile ); fclose( outFile ); } return 0; } void tuned_STREAM_Copy() { int j; #ifdef _OPENMP #pragma omp parallel for #endif for (j=0; j= 1) 1 # of panels in recursion 2 NDIVs 1 # of recursive panel fact. 1 RFACTs (0=left, 1=Crout, 2=Right) 1 # of broadcast 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) 1 # of lookahead depth 1 DEPTHs (>=0) 2 SWAP (0=bin-exch,1=long,2=mix) 64 swapping threshold 0 L1 in (0=transposed,1=no-transposed) form 0 U in (0=transposed,1=no-transposed) form 1 Equilibration (0=no,1=yes) 8 memory alignment in double (> 0) ##### This line (no. 32) is ignored (it serves as a separator). ###### 0 Number of additional problem sizes for PTRANS 1200 10000 30000 values of N 0 number of additional blocking sizes for PTRANS 40 9 8 13 13 20 16 32 64 values of NB hpcc-1.4.1/hpl/BUGS0000644000000000000000000000054211256503657010602 00000000000000============================================================== List of the known problems with the HPL software Current as of release 2.0 - September 10, 2008 ============================================================== ============================================================== ============================================================== hpcc-1.4.1/hpl/COPYRIGHT0000644000000000000000000000615311256503657011416 00000000000000====================================================================== -- High Performance Computing Linpack Benchmark (HPL) HPL - 2.0 - September 10, 2008 Antoine P. Petitet University of Tennessee, Knoxville Innovative Computing Laboratory (C) Copyright 2000-2008 All Rights Reserved -- Copyright notice and Licensing terms: Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. All advertising materials mentioning features or use of this software must display the following acknowledgement: This product includes software developed at the University of Tennessee, Knoxville, Innovative Computing Laboratory. 4. The name of the University, the name of the Laboratory, or the names of its contributors may not be used to endorse or promote products derived from this software without specific written permission. -- Disclaimer: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. ====================================================================== hpcc-1.4.1/hpl/HISTORY0000644000000000000000000000502211256503657011201 00000000000000============================================================== High Performance Computing Linpack Benchmark (HPL) HPL 2.0 - September 10, 2008 ============================================================== History - 09/09/00 Public release of Version 1.0 - 09/27/00 A couple of mistakes in the VSIPL port have been corrected. The tar file as well as the web site were updated on September 27th, 2000. Note that these problems were not affecting the BLAS version of the software in any way. - 01/01/04 Version 1.0a The MPI process grid numbering scheme is now an run-time option. The inlined assembly timer routine that caused the compila- tion to fail when using gcc version 3.3 and above has been removed from the package. Various building problems on the T3E have been fixed; Thanks to Edward Anderson. - 15/12/04 Version 1.0b Weakness of the pseudo-random matrix generator found for pro- blem sizes being power of twos and larger than 2^15; Thanks to Gregory Bauer. This problem has not been fixed. It is thus currently recommended to HPL users willing to test matrices of size larger than 2^15 to not use power twos. When the matrix size is such that one needs > 16 GB per MPI rank, the intermediate calculation (mat.ld+1) * mat.nq in HPL_pdtest.c ends up overflowing because it is done using 32-bit arithmetic. This issue has been fixed by typecasting to size_t; Thanks to John Baron. - 09/10/08 Version 2.0 Piotr Luszczek changed to 64-bit RNG, modified files: -- [M] include/hpl_matgen.h -- [M] testing/matgen/HPL_ladd.c -- [M] testing/matgen/HPL_lmul.c -- [M] testing/matgen/HPL_rand.c -- [M] testing/ptest/HPL_pdinfo.c For a motivation for the change, see: Dongarra and Langou, ``The Problem with the Linpack Benchmark Matrix Generator'', LAWN 206, June 2008. -- [M] testing/ptest/HPL_pdtest.c -- Julien Langou changed the test for correctness from ||Ax-b||_oo / ( eps * ||A||_1 * N ) ||Ax-b||_oo / ( eps * ||A||_1 * ||x||_1 ) ||Ax-b||_oo / ( eps * ||A||_oo * ||x||_oo * N ) to the normwise backward error || r ||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) See: Nicholas J. Higham, ``Accuracy and Stability of Numerical Algorithms'', Society for Industrial and Applied Mathematics, Philadelphia, PA, USA, Second Edition, pages = xxx+680, ISBN = 0-89871-521-0, 2002. Note that in our case || b ||_oo is almost for sure 1/2, we compute it anyway. ============================================================== hpcc-1.4.1/hpl/INSTALL0000644000000000000000000000602111256503657011146 00000000000000============================================================== High Performance Computing Linpack Benchmark (HPL) HPL 2.0 - September 10, 2008 ============================================================== 1) Retrieve the tar file, then gunzip hpl.tgz; tar -xvf hpl.tar this will create an hpl directory, that we call below the top-level directory. 2) Create a file Make. in the top-level directory. For this purpose, you may want to re-use one contained in the setup directory. This file essentially contains the compilers and librairies with their paths to be used. 3) Type "make arch=". This should create an executable in the bin/ directory called xhpl. For example, on our Linux PII cluster, I create a file called Make.Linux_PII in the top-level directory. Then, I type "make arch=Linux_PII" This creates the executable file bin/Linux_PII/xhpl. 4) Quick check: run a few tests: cd bin/ mpirun -np 4 xhpl 5) Tuning: Most of the performance parameters can be tuned, by modifying the input file bin/HPL.dat. See the file TUNING in the top-level directory. ============================================================== Compile time options: At the end of the "model" Make., --------------------- the user is given the opportunity to compile the software with some specific compile options. The list of this options and their meaning are: -DHPL_COPY_L force the copy of the panel L before bcast; -DHPL_CALL_CBLAS call the cblas interface; -DHPL_CALL_VSIPL call the vsip library; -DHPL_DETAILED_TIMING enables detail timers; The user must choose between either the BLAS Fortran 77 interface, or the BLAS C interface, or the VSIPL library depending on which computational kernels are available on his system. Only one of these options should be selected. If you choose the BLAS Fortran 77 interface, it is necessary to fill out the machine-specific C to Fortran 77 interface section of the Make. file. To do this, please refer to the Make. examples contained in the setup directory. By default HPL will: *) not copy L before broadcast, *) call the BLAS Fortran 77 interface, *) not display detailed timing information. As an example, suppose one wants HPL to copy the panel of columns into a contiguous buffer before broadcasting. In theory, it would be more efficient to let HPL create the appropriate MPI user-defined data type since this may avoid the data copy. So, it is a strange idea, but one insists. To achieve this one would add -DHPL_COPY_L to the definition of HPL_OPTS at the end of the file Make.. Issue then a "make clean arch=; make build arch=" and the xhpl executable will be re-build with that feature in. ============================================================== Check out the website www.netlib.org/benchmark/hpl for the latest information. ============================================================== hpcc-1.4.1/hpl/Make.UNKNOWN0000644000000000000000000000035411256503657012056 00000000000000# -*- Makefile -*- RM = exit CD = exit arch=UNKNOWN UNKNOWN: @echo @echo Please specify "'"arch"'" variable, for example: @echo 1. Create file Make.Unix in "'"hpl"'" directory @echo 2. Type: make arch=Unix @echo .PHONY: UNKNOWN hpcc-1.4.1/hpl/Make.top0000644000000000000000000002127211256503657011523 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # arch = UNKNOWN # include Make.$(arch) # ## build ############################################################### # build_src : ( $(CD) src/auxil/$(arch); $(MAKE) ) ( $(CD) src/blas/$(arch); $(MAKE) ) ( $(CD) src/comm/$(arch); $(MAKE) ) ( $(CD) src/grid/$(arch); $(MAKE) ) ( $(CD) src/panel/$(arch); $(MAKE) ) ( $(CD) src/pauxil/$(arch); $(MAKE) ) ( $(CD) src/pfact/$(arch); $(MAKE) ) ( $(CD) src/pgesv/$(arch); $(MAKE) ) # build_tst : ( $(CD) testing/matgen/$(arch); $(MAKE) ) ( $(CD) testing/timer/$(arch); $(MAKE) ) ( $(CD) testing/pmatgen/$(arch); $(MAKE) ) ( $(CD) testing/ptimer/$(arch); $(MAKE) ) ( $(CD) testing/ptest/$(arch); $(MAKE) ) #( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' ) # ## startup ############################################################# # startup_dir : - $(MKDIR) include/$(arch) - $(MKDIR) lib - $(MKDIR) lib/$(arch) - $(MKDIR) bin - $(MKDIR) bin/$(arch) # startup_src : - $(MAKE) -f Make.top leaf le=src/auxil arch=$(arch) - $(MAKE) -f Make.top leaf le=src/blas arch=$(arch) - $(MAKE) -f Make.top leaf le=src/comm arch=$(arch) - $(MAKE) -f Make.top leaf le=src/grid arch=$(arch) - $(MAKE) -f Make.top leaf le=src/panel arch=$(arch) - $(MAKE) -f Make.top leaf le=src/pauxil arch=$(arch) - $(MAKE) -f Make.top leaf le=src/pfact arch=$(arch) - $(MAKE) -f Make.top leaf le=src/pgesv arch=$(arch) # startup_tst : - $(MAKE) -f Make.top leaf le=testing/matgen arch=$(arch) - $(MAKE) -f Make.top leaf le=testing/timer arch=$(arch) - $(MAKE) -f Make.top leaf le=testing/pmatgen arch=$(arch) - $(MAKE) -f Make.top leaf le=testing/ptimer arch=$(arch) - $(MAKE) -f Make.top leaf le=testing/ptest arch=$(arch) #- SPMS_make_make`' -f Make.top leaf le=testing/test arch=$(arch) # ## refresh ############################################################# # refresh_src : - $(CP) makes/Make.auxil src/auxil/$(arch)/Makefile - $(CP) makes/Make.blas src/blas/$(arch)/Makefile - $(CP) makes/Make.comm src/comm/$(arch)/Makefile - $(CP) makes/Make.grid src/grid/$(arch)/Makefile - $(CP) makes/Make.panel src/panel/$(arch)/Makefile - $(CP) makes/Make.pauxil src/pauxil/$(arch)/Makefile - $(CP) makes/Make.pfact src/pfact/$(arch)/Makefile - $(CP) makes/Make.pgesv src/pgesv/$(arch)/Makefile # refresh_tst : - $(CP) makes/Make.matgen testing/matgen/$(arch)/Makefile - $(CP) makes/Make.timer testing/timer/$(arch)/Makefile - $(CP) makes/Make.pmatgen testing/pmatgen/$(arch)/Makefile - $(CP) makes/Make.ptimer testing/ptimer/$(arch)/Makefile - $(CP) makes/Make.ptest testing/ptest/$(arch)/Makefile #- SPMS_make_cp`' makes/Make.test testing/test/$(arch)/Makefile # ## clean ############################################################### # clean_src : - ( $(CD) src/auxil/$(arch); $(MAKE) clean ) - ( $(CD) src/blas/$(arch); $(MAKE) clean ) - ( $(CD) src/comm/$(arch); $(MAKE) clean ) - ( $(CD) src/grid/$(arch); $(MAKE) clean ) - ( $(CD) src/panel/$(arch); $(MAKE) clean ) - ( $(CD) src/pauxil/$(arch); $(MAKE) clean ) - ( $(CD) src/pfact/$(arch); $(MAKE) clean ) - ( $(CD) src/pgesv/$(arch); $(MAKE) clean ) # clean_tst : - ( $(CD) testing/matgen/$(arch); $(MAKE) clean ) - ( $(CD) testing/timer/$(arch); $(MAKE) clean ) - ( $(CD) testing/pmatgen/$(arch); $(MAKE) clean ) - ( $(CD) testing/ptimer/$(arch); $(MAKE) clean ) - ( $(CD) testing/ptest/$(arch); $(MAKE) clean ) #- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_make`' clean ) # ## clean_arch ########################################################## # clean_arch_src : - $(RM) -r src/auxil/$(arch) - $(RM) -r src/blas/$(arch) - $(RM) -r src/comm/$(arch) - $(RM) -r src/grid/$(arch) - $(RM) -r src/panel/$(arch) - $(RM) -r src/pauxil/$(arch) - $(RM) -r src/pfact/$(arch) - $(RM) -r src/pgesv/$(arch) # clean_arch_tst : - $(RM) -r testing/matgen/$(arch) - $(RM) -r testing/timer/$(arch) - $(RM) -r testing/pmatgen/$(arch) - $(RM) -r testing/ptimer/$(arch) - $(RM) -r testing/ptest/$(arch) #- SPMS_make_rm`' -r testing/test/$(arch) # ## clean_arch_all ###################################################### # clean_arch_all : - $(MAKE) -f Make.top clean_arch_src arch=$(arch) - $(MAKE) -f Make.top clean_arch_tst arch=$(arch) - $(RM) -r bin/$(arch) include/$(arch) lib/$(arch) # ## clean_guard ######################################################### # clean_guard_src : - ( $(CD) src/auxil/$(arch); $(RM) *.grd ) - ( $(CD) src/blas/$(arch); $(RM) *.grd ) - ( $(CD) src/comm/$(arch); $(RM) *.grd ) - ( $(CD) src/grid/$(arch); $(RM) *.grd ) - ( $(CD) src/panel/$(arch); $(RM) *.grd ) - ( $(CD) src/pauxil/$(arch); $(RM) *.grd ) - ( $(CD) src/pfact/$(arch); $(RM) *.grd ) - ( $(CD) src/pgesv/$(arch); $(RM) *.grd ) # clean_guard_tst : - ( $(CD) testing/matgen/$(arch); $(RM) *.grd ) - ( $(CD) testing/timer/$(arch); $(RM) *.grd ) - ( $(CD) testing/pmatgen/$(arch); $(RM) *.grd ) - ( $(CD) testing/ptimer/$(arch); $(RM) *.grd ) - ( $(CD) testing/ptest/$(arch); $(RM) *.grd ) #- ( SPMS_make_cd`' testing/test/$(arch); SPMS_make_rm`' *.grd ) # ## misc ################################################################ # leaf : - ( $(CD) $(le) ; $(MKDIR) $(arch) ) - ( $(CD) $(le)/$(arch) ; \ $(LN_S) $(TOPdir)/Make.$(arch) Make.inc ) # ######################################################################## hpcc-1.4.1/hpl/Makefile0000644000000000000000000001063411256503657011562 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # SHELL = /bin/sh # arch = UNKNOWN # ## Targets ############################################################# # all : install # # ###################################################################### # install : startup refresh build # startup : $(MAKE) -f Make.top startup_dir arch=$(arch) $(MAKE) -f Make.top startup_src arch=$(arch) $(MAKE) -f Make.top startup_tst arch=$(arch) $(MAKE) -f Make.top refresh_src arch=$(arch) $(MAKE) -f Make.top refresh_tst arch=$(arch) # refresh : $(MAKE) -f Make.top refresh_src arch=$(arch) $(MAKE) -f Make.top refresh_tst arch=$(arch) # build : $(MAKE) -f Make.top build_src arch=$(arch) $(MAKE) -f Make.top build_tst arch=$(arch) # clean : $(MAKE) -f Make.top clean_src arch=$(arch) $(MAKE) -f Make.top clean_tst arch=$(arch) # clean_arch : $(MAKE) -f Make.top clean_arch_src arch=$(arch) $(MAKE) -f Make.top clean_arch_tst arch=$(arch) # clean_arch_all : $(MAKE) -f Make.top clean_arch_all arch=$(arch) # clean_guard : $(MAKE) -f Make.top clean_guard_src arch=$(arch) $(MAKE) -f Make.top clean_guard_tst arch=$(arch) # # ###################################################################### hpcc-1.4.1/hpl/README0000644000000000000000000000242111256503657010775 00000000000000============================================================== High Performance Computing Linpack Benchmark (HPL) HPL 2.0 - September 10, 2008 ============================================================== HPL is a software package that solves a (random) dense linear system in double precision (64 bits) arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available implementation of the High Performance Computing Linpack Benchmark. The HPL software package requires the availibility on your system of an implementation of the Message Passing Interface MPI (1.1 compliant). An implementation of either the Basic Linear Algebra Subprograms BLAS or the Vector Signal Image Processing Library VSIPL is also needed. Machine-specific as well as generic implementations of MPI, the BLAS and VSIPL are available for a large variety of systems. Install See the file INSTALL in this directory. ------- Tuning See the file TUNING in this directory. ------ Bugs Known problems and bugs with this release are documen- ---- ted in the file hpl/BUGS. Check out the website www.netlib.org/benchmark/hpl for the latest information. ============================================================== hpcc-1.4.1/hpl/TODO0000644000000000000000000000115411256503657010607 00000000000000============================================================== High Performance Computing Linpack Benchmark (HPL) HPL 2.0 - September 10, 2008 ============================================================== Done list in version 1.0b, December 15th, 2004 - Fixed problem with 32-bit integer overflow. Thanks to John Baron. Done list in version 1.0a, January 1st, 2004 - Added Row- or Column-major process mapping in data file - Fixed compilation error for gcc 3.3 in walltime. - Fixed building problems on the T3E; Thanks to Edward Anderson. ============================================================== hpcc-1.4.1/hpl/TUNING0000644000000000000000000004211211256503657011045 00000000000000============================================================== Performance Tuning and setting up the input data file HPL.dat Current as of release 2.0 - September 10, 2008 ============================================================== Check out the website www.netlib.org/benchmark/hpl for the latest information. After having built the executable hpl/bin//xhpl, one may want to modify the input data file HPL.dat. This file should reside in the same directory as the executable hpl/bin//xhpl. An example HPL.dat file is provided by default. This file contains information about the problem sizes, machine configuration, and algorithm features to be used by the executable. It is 30 lines long. All the selected parameters will be printed in the output generated by the executable. At the end of this file, there is a couple of experimental guide lines that you may find useful. ============================================================== File HPL.dat (description): Line 1: (unused) Typically one would use this line for its own good. For example, it could be used to summarize the con- tent of the input file. By default this line reads: HPL Linpack benchmark input file Line 2: (unused) same as line 1. By default this line reads: Innovative Computing Laboratory, University of Tennessee Line 3: the user can choose where the output should be re- directed to. In the case of a file, a name is necessary, and this is the line where one wants to specify it. Only the first name on this line is significative. By default, the li- ne reads: HPL.out output file name (if any) This means that if one chooses to redirect the output to a file, the file will be called "HPL.out". The rest of the line is unused, and this space to put some informative comment on the meaning of this line. Line 4: This line specifies where the output should go. The line is formatted, it must be a positive integer, the rest is unsignificant. 3 choices are possible for the positive inte- ger, 6 means that the output will go the standard output, 7 means that the output will go to the standard error. Any o- ther integer means that the output should be redirected to a file, which name has been specified in the line above. This line by default reads: 6 device out (6=stdout,7=stderr,file) which means that the output generated by the executable should be redirected to the standard output. Line 5: This line specifies the number of problem sizes to be executed. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads: 3 # of problems sizes (N) this means that the user is willing to run 3 problem sizes that will be specified in the next line. Line 6: This line specifies the problem sizes one wants to run. Assuming the line above started with 3, the 3 first positive integers are significant, the rest is ignored. For example: 3000 6000 10000 Ns means that one wants xhpl to run 3 (specified in line 5) pro- blem sizes, namely 3000, 6000 and 10000. Line 7: This line specifies the number of block sizes to be runned. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads: 5 # of NBs this means that the user is willing to use 5 block sizes that will be specified in the next line. Line 8: This line specifies the block sizes one wants to run. Assuming the line above started with 5, the 5 first positive integers are significant, the rest is ignored. For example: 80 100 120 140 160 NBs means that one wants xhpl to use 5 (specified in line 7) block sizes, namely 80, 100, 120, 140 and 160. Line 9 specifies how the MPI processes should be mapped onto the nodes of your platform. There are currently two possible mappings, namely row- and column-major. This feature is main- ly useful when these nodes are themselves multi-processor computers. A row-major mapping is recommended. Line 10: This line specifies the number of process grid to be runned. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads: 2 # of process grids (P x Q) this means that you are willing to try 2 process grid sizes that will be specified in the next line. Line 11-12: These two lines specify the number of process rows and columns of each grid you want to run on. Assuming the line above (10) started with 2, the 2 first positive in- tegers of those two lines are significant, the rest is igno- red. For example: 1 2 Ps 6 8 Qs means that one wants to run xhpl on 2 process grids (line 10), namely 1 by 6 and 2 by 8. Note: In this example, it is required then to start xhpl on at least 16 nodes (max of P_i xQ_i). The runs on the two grids will be consecutive. If one was starting xhpl on more than 16 nodes, say 52, only 6 would be used for the first grid (1x6) and then 16 (2x8) would be used for the second grid. The fact that you started the MPI job on 52 nodes, will not make HPL use all of them. In this example, only 16 would be used. If one wants to run xhpl with 52 processes one needs to specify a grid of 52 processes, for example the following lines would do the job: 4 2 Ps 13 8 Qs Line 13: This line specifies the threshold the residuals should be compared to. The residuals should be or order 1, but are in practice slightly less than this, typically 0.001. This line is made of a real number, the rest is unsignifi- cant. For example: 16.0 threshold In practice, a value of 16.0 will cover most cases. For va- rious reasons, it is possible that some of the residuals be- come slightly larger, say for example 35.6. xhpl will flag those runs as failed, however they can be considered as cor- rect. A run can be considered as failed if the residual is a few order of magnitude bigger than 1 for example 10^6 or mo- re. Note: if one was to specify a threshold of 0.0, all tests would be flagged as failed, even though the answer is likely to be correct. It is allowed to specify a negative value for this threshold, in which case the checks will be by-passed, no matter what the value is, as soon as it is negative. This feature allows to save time when performing a lot of experi- ments, say for instance during the tuning phase. Example: -16.0 threshold The remaning lines allow to specifies algorithmic features. xhpl will run all possible combinations of those for each problem size, block size, process grid combination. This is handy when one looks for an "optimal" set of parameters. To understand a little bit better, let say first a few words about the algorithm implemented in HPL. Basically this is a right-looking version with row-partial pivoting. The panel factorization is matrix-matrix operation based and recursive, dividing the panel into NDIV subpanels at each step. This part of the panel factorization is denoted below by "recursive panel fact. (RFACT)". The recursion stops when the current panel is made of less than or equal to NBMIN columns. At that point, xhpl uses a matrix-vector operation based factorization denoted below by "PFACTs". Classic recursion would then use NDIV=2, NBMIN=1. There are essentially 3 numerically equivalent LU factorization algorithm variants (left-looking, Crout and right-looking). In HPL, one can choose every one of those for the RFACT, as well as the PFACT. The following lines of HPL.dat allows you to set those parameters. Lines 14-21: (Example 1) 3 # of panel fact 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) 4 # of recursive stopping criterium 1 2 4 8 NBMINs (>= 1) 3 # of panels in recursion 2 3 4 NDIVs 3 # of recursive panel fact. 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) This example would try all variants of PFACT, 4 values for NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 and 4, and all variants for RFACT. Lines 14-21: (Example 1) 2 # of panel fact 2 0 PFACTs (0=left, 1=Crout, 2=Right) 2 # of recursive stopping criterium 4 8 NBMINs (>= 1) 1 # of panels in recursion 2 NDIVs 1 # of recursive panel fact. 2 RFACTs (0=left, 1=Crout, 2=Right) This example would try 2 variants of PFACT namely right loo- king and left looking, 2 values for NBMIN, namely 4 and 8, 1 value for NDIV namely 2, and one variant for RFACT. In the main loop of the algorithm, the current panel of co- lumn is broadcast in process rows using a virtual ring to- pology. HPL offers various choices, and one most likely want to use the increasing ring modified encoded as 1. 4 is also a good choice. Lines 22-23: (Example 1): 1 # of broadcast 1 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) This will cause HPL to broadcast the current panel using the increasing ring modified topology. Lines 22-23: (Example 2): 2 # of broadcast 0 4 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) This will cause HPL to broadcast the current panel using the increasing ring virtual topology and the long message algori- thm. Lines 24-25 allow to specify the look-ahead depth used by HPL. A depth of 0 means that the next panel is factorized af- ter the update by the current panel is completely finished. A depth of 1 means that the next panel is factorized immediate- ly after being updated. The update by the current panel is then finished. A depth of k means that the k next panels are factorized immediately after being updated. The update by the current panel is then finished. It turns out that a depth of 1 seems to give the best results, but may need a large pro- blem size before one can see the performance gain. So use 1, if you do not know better, otherwise you may want to try 0. Look-ahead of depths 2 and larger will probably not give you better results. Lines 24-25: (Example 1): 1 # of lookahead depth 1 DEPTHs (>=0) This will cause HPL to use a look-ahead of depth 1. Lines 24-25: (Example 2): 2 # of lookahead depth 0 1 DEPTHs (>=0) This will cause HPL to use a look-ahead of depths 0 and 1. Lines 26-27 allow to specify the swapping algorithm used by HPL for all tests. There are currently two swapping algo- rithms available, one based on "binary exchange" and the other one based on a "spread-roll" procedure (also called "long" below. For large problem sizes, this last one is like- ly to be more efficient. The user can also choose to mix both variants, that is "binary-exchange" for a number of columns less than a threshold value, and then the "spread-roll" al- gorithm. This threshold value is then specified on Line 27. Lines 26-27: (Example 1): 1 SWAP (0=bin-exch,1=long,2=mix) 60 swapping threshold This will cause HPL to use the "long" or "spread-roll" swap- ping algorithm. Note that a threshold is specified in that example but not used by HPL. Lines 26-27: (Example 2): 2 SWAP (0=bin-exch,1=long,2=mix) 60 swapping threshold This will cause HPL to use the "long" or "spread-roll" swap- ping algorithm as soon as there is more than 60 columns in the row panel. Otherwise, the "binary-exchange" algorithm will be used instead. Line 28 allows to specify whether the upper triangle of the panel of columns should be stored in no-transposed or transposed form. Example: 0 L1 in (0=transposed,1=no-transposed) form Line 29 allows to specify whether the panel of rows U should be stored in no-transposed or transposed form. Example: 0 U in (0=transposed,1=no-transposed) form Line 30 enables/disables the equilibration phase. This option will not be used unless you selected 1 or 2 in Line 26. Ex: 1 Equilibration (0=no,1=yes) Line 31 allows to specify the alignment in memory for the memory space allocated by HPL. On modern machines, one proba- bly wants to use 4, 8 or 16. This may result in a tiny amount of memory wasted. Example: 4 memory alignment in double (> 0) ============================================================== Guide lines: 1) Figure out a good block size for the matrix-matrix multiply routine. The best method is to try a few out. If you happen to know the block size used by the matrix-matrix multiply routine, a small multiple of that block size will do fine. HPL uses the block size NB for the data distribution as well as for the computational granularity. From a data distribution point of view, the smallest NB, the better the load balance. You definitely want to stay away from very large values of NB. From a computation point of view, a too small value of NB may limit the computational performance by a large factor because almost no data reuse will occur in the highest level of the memory hierarchy. The number of messages will also increase. Efficient matrix-multiply routines are often internally blocked. Small multiples of this blocking factor are likely to be good block sizes for HPL. The bottom line is that "good" block sizes are almost always in the [32..256] interval. The best values depend on the computation / communication performance ratio of your system. To a much less extent, the problem size matters as well. Say for example, you emperically found that 44 was a good block size with respect to performance. 88 or 132 are likely to give slightly better results for large problem sizes because of a slighlty higher flop rate. 2) The process mapping should not matter if the nodes of your platform are single processor computers. If these nodes are multi-processors, a row-major mapping is recommended. 3) HPL likes "square" or slightly flat process grids. Unless you are using a very small process grid, stay away from the 1-by-Q and P-by-1 process grids. 4) Panel factorization parameters: a good start are the fol- lowing for the lines 14-21: 1 # of panel fact 1 PFACTs (0=left, 1=Crout, 2=Right) 2 # of recursive stopping criterium 4 8 NBMINs (>= 1) 1 # of panels in recursion 2 NDIVs 1 # of recursive panel fact. 2 RFACTs (0=left, 1=Crout, 2=Right) 5) Broadcast parameters: at this time, it is far from obvious to me what the best setting is, so i would probably try them all. If I had to guess I would probably start with the follo- wing for the lines 22-23: 2 # of broadcast 1 3 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) The best broadcast depends on your problem size and harware performance. My take is that 4 or 5 may be competitive for machines featuring very fast nodes comparatively to the network. 6) Look-ahead depth: as mentioned above 0 or 1 are likely to be the best choices. This also depends on the problem size and machine configuration, so I would try "no look-ahead (0)" and "look-ahead of depth 1 (1)". That is for lines 24-25: 2 # of lookahead depth 0 1 DEPTHs (>=0) 7) Swapping: one can select only one of the three algorithm in the input file. Theoretically, mix (2) should win, however long (1) might just be good enough. The difference should be small between those two assuming a swapping threshold of the order of the block size (NB) selected. If this threshold is very large, HPL will use bin_exch (0) most of the time and if it is very small (< NB) long (1) will always be used. In short and assuming the block size (NB) used is say 60, I would choose for the lines 26-27: 2 SWAP (0=bin-exch,1=long,2=mix) 60 swapping threshold I would also try the long variant. For a very small number of processes in every column of the process grid (say < 4), very little performance difference should be observable. 8) Local storage: I do not think Line 28 matters. Pick 0 in doubt. Line 29 is more important. It controls how the panel of rows should be stored. No doubt 0 is better. The caveat is that in that case the matrix-multiply function is called with ( Notrans, Trans, ... ), that is C := C - A B^T. Unless the computational kernel you are using has a very poor (with respect to performance) implementation of that case, and is much more efficient with ( Notrans, Notrans, ... ) just pick 0 as well. So, my choice: 0 L1 in (0=transposed,1=no-transposed) form 0 U in (0=transposed,1=no-transposed) form 9) Equilibration: It is hard to tell whether equilibration should always be performed or not. Not knowing much about the random matrix generated and because the overhead is so small compared to the possible gain, I turn it on all the time. 1 Equilibration (0=no,1=yes) 10) For alignment, 4 should be plenty, but just to be safe, one may want to pick 8 instead. 8 memory alignment in double (> 0) ============================================================== hpcc-1.4.1/hpl/include/hpccmema.h0000644000000000000000000000116511256503657013472 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ #ifndef HPCCMEMA_H #define HPCCMEMA_H 1 #ifdef HPCC_MEMALLCTR extern int HPCC_alloc_init(size_t total_size); extern int HPCC_alloc_finalize(); extern void *HPCC_malloc(size_t size); extern void HPCC_free(void *ptr); #define HPCC_fftw_malloc HPCC_malloc #define HPCC_fftw_free HPCC_free #define HPCC_XMALLOC(t,s) ((t*)HPCC_malloc(sizeof(t)*(s))) #else #define HPCC_malloc malloc #define HPCC_free free #define HPCC_fftw_malloc fftw_malloc #define HPCC_fftw_free fftw_free #define HPCC_XMALLOC(t,s) XMALLOC(t,s) #endif #endif hpcc-1.4.1/hpl/include/hpl.h0000644000000000000000000001053611256503657012502 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_H #define HPL_H /* * --------------------------------------------------------------------- * HPL default compile options that can overridden in the Make. * --------------------------------------------------------------------- */ #ifndef HPL_NO_MPI_DATATYPE /* Use MPI user-defined data type */ #define HPL_USE_MPI_DATATYPE #endif #ifndef HPL_COPY_L /* do not copy L, use MPI user-defined data types */ #define HPL_NO_COPY_L #endif #ifndef HPL_DETAILED_TIMING /* Do not enable detailed timings */ #define HPL_NO_DETAILED_TIMING #endif #ifndef HPL_CALL_VSIPL /* Call the Fortran 77 BLAS interface */ #ifndef HPL_CALL_CBLAS /* there can be only one */ #define HPL_CALL_FBLAS #endif #endif /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" #include "hpl_gesv.h" #include "hpl_pmisc.h" #include "hpl_pauxil.h" #include "hpl_panel.h" #include "hpl_pfact.h" #include "hpl_pgesv.h" #include "hpl_timer.h" #include "hpl_matgen.h" #include "hpl_test.h" #include "hpl_ptimer.h" #include "hpl_pmatgen.h" #include "hpl_ptest.h" #endif /* * End of hpl.h */ hpcc-1.4.1/hpl/include/hpl_auxil.h0000644000000000000000000001300311256503657013674 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_AUXIL_H #define HPL_AUXIL_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" /* * --------------------------------------------------------------------- * typedef definitions * --------------------------------------------------------------------- */ typedef enum { HPL_NORM_A = 800, HPL_NORM_1 = 801, HPL_NORM_I = 802 } HPL_T_NORM; typedef enum { HPL_MACH_EPS = 900, /* relative machine precision */ HPL_MACH_SFMIN = 901, /* safe minimum st 1/sfmin does not overflow */ HPL_MACH_BASE = 902, /* base = base of the machine */ HPL_MACH_PREC = 903, /* prec = eps*base */ HPL_MACH_MLEN = 904, /* number of (base) digits in the mantissa */ HPL_MACH_RND = 905, /* 1.0 if rounding occurs in addition */ HPL_MACH_EMIN = 906, /* min exponent before (gradual) underflow */ HPL_MACH_RMIN = 907, /* underflow threshold base**(emin-1) */ HPL_MACH_EMAX = 908, /* largest exponent before overflow */ HPL_MACH_RMAX = 909 /* overflow threshold - (base**emax)*(1-eps) */ } HPL_T_MACH; /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_fprintf STDC_ARGS( ( FILE *, const char *, ... ) ); void HPL_warn STDC_ARGS( ( FILE *, int, const char *, const char *, ... ) ); void HPL_abort STDC_ARGS( ( int, const char *, const char *, ... ) ); void HPL_dlacpy STDC_ARGS( ( const int, const int, const double *, const int, double *, const int ) ); void HPL_dlatcpy STDC_ARGS( ( const int, const int, const double *, const int, double *, const int ) ); void HPL_dlaprnt STDC_ARGS( ( const int, const int, double *, const int, const int, const int, const char * ) ); double HPL_dlange STDC_ARGS( ( const HPL_T_NORM, const int, const int, const double *, const int ) ); double HPL_dlamch STDC_ARGS( ( const HPL_T_MACH ) ); #endif /* * End of hpl_auxil.h */ hpcc-1.4.1/hpl/include/hpl_blas.h0000644000000000000000000004674411256503657013515 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_BLAS_H #define HPL_BLAS_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" /* * --------------------------------------------------------------------- * typedef definitions * --------------------------------------------------------------------- */ enum HPL_ORDER { HplRowMajor = 101, HplColumnMajor = 102 }; enum HPL_TRANS { HplNoTrans = 111, HplTrans = 112, HplConjTrans = 113 }; enum HPL_UPLO { HplUpper = 121, HplLower = 122 }; enum HPL_DIAG { HplNonUnit = 131, HplUnit = 132 }; enum HPL_SIDE { HplLeft = 141, HplRight = 142 }; #ifdef HPL_CALL_CBLAS /* * --------------------------------------------------------------------- * The C interface of the BLAS is available ... * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define CBLAS_INDEX int #define CBLAS_ORDER HPL_ORDER #define CblasRowMajor HplRowMajor #define CblasColMajor HplColMajor #define CBLAS_TRANSPOSE HPL_TRANS #define CblasNoTrans HplNoTrans #define CblasTrans HplTrans #define CblasConjTrans HplConjTrans #define CBLAS_UPLO HPL_UPLO #define CblasUpper HplUpper #define CblasLower HplLower #define CBLAS_DIAG HPL_DIAG #define CblasNonUnit HplNonUnit #define CblasUnit HplUnit #define CBLAS_SIDE HPL_SIDE #define CblasLeft HplLeft #define CblasRight HplRight /* * --------------------------------------------------------------------- * CBLAS Function prototypes * --------------------------------------------------------------------- */ CBLAS_INDEX cblas_idamax STDC_ARGS( ( const int, const double *, const int ) ); void cblas_dswap STDC_ARGS( ( const int, double *, const int, double *, const int ) ); void cblas_dcopy STDC_ARGS( ( const int, const double *, const int, double *, const int ) ); void cblas_daxpy STDC_ARGS( ( const int, const double, const double *, const int, double *, const int ) ); void cblas_dscal STDC_ARGS( ( const int, const double, double *, const int ) ); void cblas_dgemv STDC_ARGS( ( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, const int, const int, const double, const double *, const int, const double *, const int, const double, double *, const int ) ); void cblas_dger STDC_ARGS( ( const enum CBLAS_ORDER, const int, const int, const double, const double *, const int, const double *, const int, double *, const int ) ); void cblas_dtrsv STDC_ARGS( ( const enum CBLAS_ORDER, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, const int, const double *, const int, double *, const int ) ); void cblas_dgemm STDC_ARGS( ( const enum CBLAS_ORDER, const enum CBLAS_TRANSPOSE, const enum CBLAS_TRANSPOSE, const int, const int, const int, const double, const double *, const int, const double *, const int, const double, double *, const int ) ); void cblas_dtrsm STDC_ARGS( ( const enum CBLAS_ORDER, const enum CBLAS_SIDE, const enum CBLAS_UPLO, const enum CBLAS_TRANSPOSE, const enum CBLAS_DIAG, const int, const int, const double, const double *, const int, double *, const int ) ); /* * --------------------------------------------------------------------- * HPL C BLAS macro definition * --------------------------------------------------------------------- */ #define HPL_dswap cblas_dswap #define HPL_dcopy cblas_dcopy #define HPL_daxpy cblas_daxpy #define HPL_dscal cblas_dscal #define HPL_idamax cblas_idamax #define HPL_dgemv cblas_dgemv #define HPL_dtrsv cblas_dtrsv #define HPL_dger cblas_dger #define HPL_dgemm cblas_dgemm #define HPL_dtrsm cblas_dtrsm #endif #ifdef HPL_CALL_FBLAS /* * --------------------------------------------------------------------- * Use the Fortran 77 interface of the BLAS ... * --------------------------------------------------------------------- * Defaults: Add_, F77_INTEGER=int, StringSunStyle * --------------------------------------------------------------------- */ #ifndef NoChange #ifndef UpCase #ifndef Add__ #ifndef Add_ #define Add_ #endif #endif #endif #endif #ifndef F77_INTEGER #define F77_INTEGER int #else #define HPL_USE_F77_INTEGER_DEF #endif #ifndef StringCrayStyle #ifndef StringStructVal #ifndef StringStructPtr #ifndef StringSunStyle #define StringSunStyle #endif #endif #endif #endif /* * --------------------------------------------------------------------- * Fortran 77 <-> C interface * --------------------------------------------------------------------- * * These macros identifies how Fortran routines will be called. * * Add_ : the Fortran compiler expects the name of C functions to be * in all lower case and to have an underscore postfixed it (Suns, Intel * compilers expect this). * * NoChange : the Fortran compiler expects the name of C functions to be * in all lower case (IBM RS6K compilers do this). * * UpCase : the Fortran compiler expects the name of C functions to be * in all upcase. (Cray compilers expect this). * * Add__ : the Fortran compiler in use is f2c, a Fortran to C conver- * ter. */ #ifdef NoChange /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) dgemm(...) */ #define F77dswap dswap #define F77dscal dscal #define F77dcopy dcopy #define F77daxpy daxpy #define F77idamax idamax #define F77dgemv dgemv #define F77dtrsv dtrsv #define F77dger dger #define F77dgemm dgemm #define F77dtrsm dtrsm #endif #ifdef UpCase /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) DGEMM(...) */ #ifdef CRAY_BLAS #define F77dswap SSWAP #define F77dscal SSCAL #define F77dcopy SCOPY #define F77daxpy SAXPY #define F77idamax ISAMAX #define F77dgemv SGEMV #define F77dtrsv STRSV #define F77dger SGER #define F77dgemm SGEMM #define F77dtrsm STRSM #else #define F77dswap DSWAP #define F77dscal DSCAL #define F77dcopy DCOPY #define F77daxpy DAXPY #define F77idamax IDAMAX #define F77dgemv DGEMV #define F77dtrsv DTRSV #define F77dger DGER #define F77dgemm DGEMM #define F77dtrsm DTRSM #endif #endif #ifdef Add_ /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) dgemm_(...) */ #define F77dswap dswap_ #define F77dscal dscal_ #define F77dcopy dcopy_ #define F77daxpy daxpy_ #define F77idamax idamax_ #define F77dgemv dgemv_ #define F77dtrsv dtrsv_ #define F77dger dger_ #define F77dgemm dgemm_ #define F77dtrsm dtrsm_ #endif #ifdef Add__ /* * These defines set up the naming scheme required to have a FORTRAN * routine called by a C routine with the following FORTRAN to C inter- * face: * * FORTRAN DECLARATION C CALL * SUBROUTINE DGEMM(...) dgemm_(...) */ #define F77dswap dswap_ #define F77dscal dscal_ #define F77dcopy dcopy_ #define F77daxpy daxpy_ #define F77idamax idamax_ #define F77dgemv dgemv_ #define F77dtrsv dtrsv_ #define F77dger dger_ #define F77dgemm dgemm_ #define F77dtrsm dtrsm_ #endif /* * --------------------------------------------------------------------- * Typedef definitions and conversion utilities * --------------------------------------------------------------------- */ #ifdef StringCrayStyle #include /* Type of character argument in a FORTRAN call */ #define F77_CHAR _fcd /* Character conversion utilities */ #define HPL_F2C_CHAR(c) (*(_fcdtocp(c) )) #define HPL_C2F_CHAR(c) (_cptofcd(&(c), 1)) #define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ #endif /* ------------------------------------------------------------------ */ #ifdef StringStructVal /* Type of character argument in a FORTRAN call */ typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; /* Character conversion utilities */ #define HPL_F2C_CHAR(c) (*(c.cp)) #define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ #endif /* ------------------------------------------------------------------ */ #ifdef StringStructPtr /* Type of character argument in a FORTRAN call */ typedef struct { char *cp; F77_INTEGER len; } F77_CHAR; /* Character conversion utilities */ #define HPL_F2C_CHAR(c) (*(c->cp)) #define F77_CHAR_DECL F77_CHAR * /* input CHARACTER*1 */ #endif /* ------------------------------------------------------------------ */ #ifdef StringSunStyle /* Type of character argument in a FORTRAN call */ #define F77_CHAR char * /* Character conversion utilities */ #define HPL_F2C_CHAR(c) (*(c)) #define HPL_C2F_CHAR(c) (&(c)) #define F77_CHAR_DECL F77_CHAR /* input CHARACTER*1 */ #define F77_1_CHAR , F77_INTEGER #define F77_2_CHAR F77_1_CHAR F77_1_CHAR #define F77_3_CHAR F77_2_CHAR F77_1_CHAR #define F77_4_CHAR F77_3_CHAR F77_1_CHAR #endif /* ------------------------------------------------------------------ */ #ifndef F77_1_CHAR #define F77_1_CHAR #define F77_2_CHAR #define F77_3_CHAR #define F77_4_CHAR #endif #define F77_INT_DECL const F77_INTEGER * /* input integer */ #define F77_SIN_DECL const double * /* input scalar */ #define F77_VIN_DECL const double * /* input vector */ #define F77_VINOUT_DECL double * /* input/output matrix */ #define F77_MIN_DECL const double * /* input matrix */ #define F77_MINOUT_DECL double * /* input/output matrix */ #ifdef CRAY_PVP_ENV /* Type of FORTRAN functions */ #define F77_VOID_FUN extern fortran void /* subroutine */ #define F77_INT_FUN extern fortran int /* integer function */ #else #define F77_VOID_FUN extern void /* subroutine */ #define F77_INT_FUN extern int /* integer function */ #endif /* * --------------------------------------------------------------------- * Fortran 77 BLAS function prototypes * --------------------------------------------------------------------- */ F77_VOID_FUN F77dswap STDC_ARGS( ( F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); F77_VOID_FUN F77dscal STDC_ARGS( ( F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); F77_VOID_FUN F77dcopy STDC_ARGS( ( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); F77_VOID_FUN F77daxpy STDC_ARGS( ( F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL ) ); F77_INT_FUN F77idamax STDC_ARGS( ( F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL ) ); F77_VOID_FUN F77dgemv STDC_ARGS( ( F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_1_CHAR ) ); F77_VOID_FUN F77dger STDC_ARGS( ( F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_VIN_DECL, F77_INT_DECL, F77_VIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL ) ); F77_VOID_FUN F77dtrsv STDC_ARGS( ( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_MIN_DECL, F77_INT_DECL, F77_VINOUT_DECL, F77_INT_DECL F77_3_CHAR ) ); F77_VOID_FUN F77dgemm STDC_ARGS( ( F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, F77_MIN_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_2_CHAR ) ); F77_VOID_FUN F77dtrsm STDC_ARGS( ( F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_CHAR_DECL, F77_INT_DECL, F77_INT_DECL, F77_SIN_DECL, F77_MIN_DECL, F77_INT_DECL, F77_MINOUT_DECL, F77_INT_DECL F77_4_CHAR ) ); #endif /* * --------------------------------------------------------------------- * HPL BLAS Function prototypes * --------------------------------------------------------------------- */ #ifndef HPL_CALL_CBLAS int HPL_idamax STDC_ARGS( ( const int, const double *, const int ) ); void HPL_daxpy STDC_ARGS( ( const int, const double, const double *, const int, double *, const int ) ); void HPL_dcopy STDC_ARGS( ( const int, const double *, const int, double *, const int ) ); void HPL_dscal STDC_ARGS( ( const int, const double, double *, const int ) ); void HPL_dswap STDC_ARGS( ( const int, double *, const int, double *, const int ) ); void HPL_dgemv STDC_ARGS( ( const enum HPL_ORDER, const enum HPL_TRANS, const int, const int, const double, const double *, const int, const double *, const int, const double, double *, const int ) ); void HPL_dger STDC_ARGS( ( const enum HPL_ORDER, const int, const int, const double, const double *, const int, double *, const int, double *, const int ) ); void HPL_dtrsv STDC_ARGS( ( const enum HPL_ORDER, const enum HPL_UPLO, const enum HPL_TRANS, const enum HPL_DIAG, const int, const double *, const int, double *, const int ) ); void HPL_dgemm STDC_ARGS( ( const enum HPL_ORDER, const enum HPL_TRANS, const enum HPL_TRANS, const int, const int, const int, const double, const double *, const int, const double *, const int, const double, double *, const int ) ); void HPL_dtrsm STDC_ARGS( ( const enum HPL_ORDER, const enum HPL_SIDE, const enum HPL_UPLO, const enum HPL_TRANS, const enum HPL_DIAG, const int, const int, const double, const double *, const int, double *, const int ) ); #endif #endif /* * hpl_blas.h */ hpcc-1.4.1/hpl/include/hpl_comm.h0000644000000000000000000001442111256503657013512 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_COMM_H #define HPL_COMM_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_pmisc.h" #include "hpl_panel.h" /* * --------------------------------------------------------------------- * #typedefs and data structures * --------------------------------------------------------------------- */ typedef enum { HPL_1RING = 401, /* Increasing ring */ HPL_1RING_M = 402, /* Increasing ring (modified) */ HPL_2RING = 403, /* Increasing 2-ring */ HPL_2RING_M = 404, /* Increasing 2-ring (modified) */ HPL_BLONG = 405, /* long broadcast */ HPL_BLONG_M = 406 /* long broadcast (modified) */ } HPL_T_TOP; /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define HPL_FAILURE 0 #define HPL_SUCCESS 1 #define HPL_KEEP_TESTING 2 /* * --------------------------------------------------------------------- * comm function prototypes * --------------------------------------------------------------------- */ int HPL_send STDC_ARGS( ( double *, int, int, int, MPI_Comm ) ); int HPL_recv STDC_ARGS( ( double *, int, int, int, MPI_Comm ) ); int HPL_sdrv STDC_ARGS( ( double *, int, int, double *, int, int, int, MPI_Comm ) ); int HPL_binit STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait STDC_ARGS( ( HPL_T_panel * ) ); int HPL_packL STDC_ARGS( ( HPL_T_panel *, const int, const int, const int ) ); void HPL_copyL STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_1ring STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_1ring STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_1ring STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_1rinM STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_1rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_1rinM STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_2ring STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_2ring STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_2ring STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_2rinM STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_2rinM STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_2rinM STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_blong STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_blong STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_blong STDC_ARGS( ( HPL_T_panel * ) ); int HPL_binit_blonM STDC_ARGS( ( HPL_T_panel * ) ); int HPL_bcast_blonM STDC_ARGS( ( HPL_T_panel *, int * ) ); int HPL_bwait_blonM STDC_ARGS( ( HPL_T_panel * ) ); #endif /* * End of hpl_comm.h */ hpcc-1.4.1/hpl/include/hpl_gesv.h0000644000000000000000000001063111256503657013522 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_GESV_H #define HPL_GESV_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" /* * --------------------------------------------------------------------- * #typedefs and data structures * --------------------------------------------------------------------- */ typedef enum { HPL_LEFT_LOOKING = 301, /* Left looking lu fact variant */ HPL_CROUT = 302, /* Crout lu fact variant */ HPL_RIGHT_LOOKING = 303 /* Right looking lu fact variant */ } HPL_T_FACT; /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_dgesv STDC_ARGS( ( const int, const int, const int, const HPL_T_FACT, const HPL_T_FACT, const int, double *, const int, int * ) ); void HPL_ipid STDC_ARGS( ( const int, double *, int *, int *, int *, int *, int *, int *, const int, const int, const int, const int, const int ) ); #endif /* * End of hpl_gesv.h */ hpcc-1.4.1/hpl/include/hpl_grid.h0000644000000000000000000002025311256503657013504 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_GRID_H #define HPL_GRID_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_pmisc.h" /* * --------------------------------------------------------------------- * #typedefs and data structures * --------------------------------------------------------------------- */ typedef enum { HPL_INT = 100, HPL_DOUBLE = 101 } HPL_T_TYPE; typedef enum { HPL_ROW_MAJOR = 201, HPL_COLUMN_MAJOR = 202 } HPL_T_ORDER; typedef struct HPL_S_grid { MPI_Comm all_comm; /* grid communicator */ MPI_Comm row_comm; /* row communicator */ MPI_Comm col_comm; /* column communicator */ HPL_T_ORDER order; /* ordering of the procs in the grid */ int iam; /* my rank in the grid */ int myrow; /* my row number in the grid */ int mycol; /* my column number in the grid */ int nprow; /* the total # of rows in the grid */ int npcol; /* the total # of columns in the grid */ int nprocs; /* the total # of procs in the grid */ int row_ip2; /* largest power of two <= nprow */ int row_hdim; /* row_ip2 procs hypercube dimension */ int row_ip2m1; /* largest power of two <= nprow-1 */ int row_mask; /* row_ip2m1 procs hypercube mask */ int col_ip2; /* largest power of two <= npcol */ int col_hdim; /* col_ip2 procs hypercube dimension */ int col_ip2m1; /* largest power of two <= npcol-1 */ int col_mask; /* col_ip2m1 procs hypercube mask */ } HPL_T_grid; /* * --------------------------------------------------------------------- * Data Structures * --------------------------------------------------------------------- */ typedef void (*HPL_T_OP) ( const int, const void *, void *, const HPL_T_TYPE ); /* * --------------------------------------------------------------------- * #define macros definitions * --------------------------------------------------------------------- */ #define HPL_2_MPI_TYPE( typ ) \ ( ( typ == HPL_INT ? MPI_INT : MPI_DOUBLE ) ) /* * The following macros perform common modulo operations; All functions * except MPosMod assume arguments are < d (i.e., arguments are themsel- * ves within modulo range). */ /* increment with mod */ #define MModInc(I, d) if(++(I) == (d)) (I) = 0 /* decrement with mod */ #define MModDec(I, d) if(--(I) == -1) (I) = (d)-1 /* positive modulo */ #define MPosMod(I, d) ( (I) - ((I)/(d))*(d) ) /* add two numbers */ #define MModAdd(I1, I2, d) \ ( ( (I1) + (I2) < (d) ) ? (I1) + (I2) : (I1) + (I2) - (d) ) /* add 1 to # */ #define MModAdd1(I, d) ( ((I) != (d)-1) ? (I) + 1 : 0 ) /* subtract two numbers */ #define MModSub(I1, I2, d) \ ( ( (I1) < (I2) ) ? (d) + (I1) - (I2) : (I1) - (I2) ) /* sub 1 from # */ #define MModSub1(I, d) ( ((I)!=0) ? (I)-1 : (d)-1 ) /* * --------------------------------------------------------------------- * grid function prototypes * --------------------------------------------------------------------- */ int HPL_grid_init STDC_ARGS( ( MPI_Comm, const HPL_T_ORDER, const int, const int, HPL_T_grid * ) ); int HPL_grid_exit STDC_ARGS( ( HPL_T_grid * ) ); int HPL_grid_info STDC_ARGS( ( const HPL_T_grid *, int *, int *, int *, int * ) ); int HPL_pnum STDC_ARGS( ( const HPL_T_grid *, const int, const int ) ); int HPL_barrier STDC_ARGS( ( MPI_Comm ) ); int HPL_broadcast STDC_ARGS( ( void *, const int, const HPL_T_TYPE, const int, MPI_Comm ) ); int HPL_reduce STDC_ARGS( ( void *, const int, const HPL_T_TYPE, const HPL_T_OP , const int, MPI_Comm ) ); int HPL_all_reduce STDC_ARGS( ( void *, const int, const HPL_T_TYPE, const HPL_T_OP , MPI_Comm ) ); void HPL_max STDC_ARGS( ( const int, const void *, void *, const HPL_T_TYPE ) ); void HPL_min STDC_ARGS( ( const int, const void *, void *, const HPL_T_TYPE ) ); void HPL_sum STDC_ARGS( ( const int, const void *, void *, const HPL_T_TYPE ) ); #endif /* * End of hpl_grid.h */ hpcc-1.4.1/hpl/include/hpl_matgen.h0000644000000000000000000001122411256503657014030 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_MATGEN_H #define HPL_MATGEN_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define HPL_MULT0 1284865837 #define HPL_MULT1 1481765933 #define HPL_IADD0 1 #define HPL_IADD1 0 #define HPL_DIVFAC 2147483648.0 #define HPL_POW16 65536.0 #define HPL_HALF 0.5 /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_dmatgen STDC_ARGS( ( const int, const int, double *, const int, const int ) ); void HPL_lmul STDC_ARGS( ( int *, int *, int * ) ); void HPL_ladd STDC_ARGS( ( int *, int *, int * ) ); void HPL_xjumpm STDC_ARGS( ( const int, int *, int *, int *, int *, int *, int * ) ); void HPL_setran STDC_ARGS( ( const int, int * ) ); void HPL_jumpit STDC_ARGS( ( int *, int *, int *, int * ) ); double HPL_rand STDC_ARGS( ( void ) ); #endif /* * End of hpl_matgen.h */ hpcc-1.4.1/hpl/include/hpl_misc.h0000644000000000000000000001164611256503657013520 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_MISC_H #define HPL_MISC_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #ifdef __STDC__ #define HPL_STDC_HEADERS #endif #include #include #include #ifdef HPL_STDC_HEADERS #include #define STDC_ARGS(p) p #else #include #define STDC_ARGS(p) () #endif #ifdef HPL_CALL_VSIPL #include #endif /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define HPL_rone 1.0 #define HPL_rtwo 2.0 #define HPL_rzero 0.0 /* * --------------------------------------------------------------------- * #define macros definitions * --------------------------------------------------------------------- */ #define Mabs( a_ ) ( ( (a_) < 0 ) ? -(a_) : (a_) ) #define Mmin( a_, b_ ) ( ( (a_) < (b_) ) ? (a_) : (b_) ) #define Mmax( a_, b_ ) ( ( (a_) > (b_) ) ? (a_) : (b_) ) #define Mfloor(a,b) (((a)>0) ? (((a)/(b))) : (-(((-(a))+(b)-1)/(b)))) #define Mceil(a,b) ( ( (a)+(b)-1 ) / (b) ) #define Miceil(a,b) (((a)>0) ? ((((a)+(b)-1)/(b))) : (-((-(a))/(b)))) #define Mupcase(C) (((C)>96 && (C)<123) ? (C) & 0xDF : (C)) #define Mlowcase(C) (((C)>64 && (C)< 91) ? (C) | 32 : (C)) /* * Mptr returns a pointer to a_( i_, j_ ) for readability reasons and * also less silly errors ... */ #define Mptr( a_, i_, j_, lda_ ) \ ( (a_) + (size_t)(i_) + (size_t)(j_)*(size_t)(lda_) ) /* * Align pointer */ #define HPL_PTR( ptr_, al_ ) \ ( ( ( (size_t)(ptr_)+(al_)-1 ) / (al_) ) * (al_) ) #endif /* * End of hpl_misc.h */ hpcc-1.4.1/hpl/include/hpl_panel.h0000644000000000000000000001571711256503657013667 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PANEL_H #define HPL_PANEL_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_pmisc.h" #include "hpl_grid.h" /* * --------------------------------------------------------------------- * Data Structures * --------------------------------------------------------------------- */ typedef struct HPL_S_panel { struct HPL_S_grid * grid; /* ptr to the process grid */ struct HPL_S_palg * algo; /* ptr to the algo parameters */ struct HPL_S_pmat * pmat; /* ptr to the local array info */ double * A; /* ptr to trailing part of A */ double * WORK; /* work space */ double * L2; /* ptr to L */ double * L1; /* ptr to jb x jb upper block of A */ double * DPIV; /* ptr to replicated jb pivot array */ double * DINFO; /* ptr to replicated scalar info */ double * U; /* ptr to U */ int * IWORK; /* integer workspace for swapping */ void * * * buffers[2]; /* buffers for panel bcast */ int counts [2]; /* counts for panel bcast */ MPI_Datatype dtypes [2]; /* data types for panel bcast */ MPI_Request request[1]; /* requests for panel bcast */ MPI_Status status [1]; /* status for panel bcast */ int nb; /* distribution blocking factor */ int jb; /* panel width */ int m; /* global # of rows of trailing part of A */ int n; /* global # of cols of trailing part of A */ int ia; /* global row index of trailing part of A */ int ja; /* global col index of trailing part of A */ int mp; /* local # of rows of trailing part of A */ int nq; /* local # of cols of trailing part of A */ int ii; /* local row index of trailing part of A */ int jj; /* local col index of trailing part of A */ int lda; /* local leading dim of array A */ int prow; /* proc. row owning 1st row of trail. A */ int pcol; /* proc. col owning 1st col of trail. A */ int msgid; /* message id for panel bcast */ int ldl2; /* local leading dim of array L2 */ int len; /* length of the buffer to broadcast */ #ifdef HPL_CALL_VSIPL vsip_block_d * Ablock; /* A block */ vsip_block_d * L1block; /* L1 block */ vsip_block_d * L2block; /* L2 block */ vsip_block_d * Ublock; /* U block */ #endif } HPL_T_panel; /* * --------------------------------------------------------------------- * panel function prototypes * --------------------------------------------------------------------- */ #include "hpl_pgesv.h" void HPL_pdpanel_new STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, const int, const int, const int, HPL_T_pmat *, const int, const int, const int, HPL_T_panel * * ) ); void HPL_pdpanel_init STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, const int, const int, const int, HPL_T_pmat *, const int, const int, const int, HPL_T_panel * ) ); int HPL_pdpanel_disp STDC_ARGS( ( HPL_T_panel * * ) ); int HPL_pdpanel_free STDC_ARGS( ( HPL_T_panel * ) ); #endif /* * End of hpl_panel.h */ hpcc-1.4.1/hpl/include/hpl_pauxil.h0000644000000000000000000004620211256503657014063 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PAUXIL_H #define HPL_PAUXIL_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" #include "hpl_pmisc.h" #include "hpl_grid.h" /* * --------------------------------------------------------------------- * #define macros definitions * --------------------------------------------------------------------- */ /* * Mindxg2p returns the process coodinate owning the entry globally in- * dexed by ig_. */ #define Mindxg2p( ig_, inb_, nb_, proc_, src_, nprocs_ ) \ { \ if( ( (ig_) >= (inb_) ) && ( (src_) >= 0 ) && \ ( (nprocs_) > 1 ) ) \ { \ proc_ = (src_) + 1 + ( (ig_)-(inb_) ) / (nb_); \ proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ } \ else \ { \ proc_ = (src_); \ } \ } #define Mindxg2l( il_, ig_, inb_, nb_, proc_, src_, nprocs_ ) \ { \ if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ ( (nprocs_) == 1 ) ) { il_ = (ig_); } \ else \ { \ int i__, j__; \ j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ il_ = (nb_)*( j__ - i__ ) + \ ( (i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ (ig_) - (inb_) : (ig_) ); \ } \ } #define Mindxg2lp( il_, proc_, ig_, inb_, nb_, src_, nprocs_ ) \ { \ if( ( (ig_) < (inb_) ) || ( (src_) == -1 ) || \ ( (nprocs_) == 1 ) ) \ { il_ = (ig_); proc_ = (src_); } \ else \ { \ int i__, j__; \ j__ = ( i__ = ( (ig_)-(inb_) ) / (nb_) ) / (nprocs_); \ il_ = (nb_)*(j__-i__) + \ ( ( i__ + 1 - ( j__ + 1 ) * (nprocs_) ) ? \ (ig_) - (inb_) : (ig_) ); \ proc_ = (src_) + 1 + i__; \ proc_ -= ( proc_ / (nprocs_) ) * (nprocs_); \ } \ } /* * Mindxl2g computes the global index ig_ corresponding to the local * index il_ in process proc_. */ #define Mindxl2g( ig_, il_, inb_, nb_, proc_, src_, nprocs_ ) \ { \ if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ if( (proc_) == (src_) ) \ { \ if( (il_) < (inb_) ) ig_ = (il_); \ else ig_ = (il_) + \ (nb_)*((nprocs_)-1)*(((il_)-(inb_))/(nb_) + 1); \ } \ else if( (proc_) < (src_) ) \ { \ ig_ = (il_) + (inb_) + \ (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ (proc_)-(src_)-1+(nprocs_) ); \ } \ else \ { \ ig_ = (il_) + (inb_) + \ (nb_)*( ((nprocs_)-1)*((il_)/(nb_)) + \ (proc_)-(src_)-1 ); \ } \ } \ else \ { \ ig_ = (il_); \ } \ } /* * MnumrocI computes the # of local indexes np_ residing in the process * of coordinate proc_ corresponding to the interval of global indexes * i_:i_+n_-1 assuming that the global index 0 resides in the process * src_, and that the indexes are distributed from src_ using the para- * meters inb_, nb_ and nprocs_. */ #define MnumrocI( np_, n_, i_, inb_, nb_, proc_, src_, nprocs_ ) \ { \ if( ( (src_) >= 0 ) && ( (nprocs_) > 1 ) ) \ { \ int inb__, mydist__, n__, nblk__, quot__, src__; \ if( ( inb__ = (inb_) - (i_) ) <= 0 ) \ { \ nblk__ = (-inb__) / (nb_) + 1; \ src__ = (src_) + nblk__; \ src__ -= ( src__ / (nprocs_) ) * (nprocs_); \ inb__ += nblk__*(nb_); \ if( ( n__ = (n_) - inb__ ) <= 0 ) \ { \ if( (proc_) == src__ ) np_ = (n_); \ else np_ = 0; \ } \ else \ { \ if( ( mydist__ = (proc_) - src__ ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ mydist__ -= nblk__ - \ (quot__ = (nblk__ / (nprocs_))) * (nprocs_); \ if( mydist__ < 0 ) \ { \ if( (proc_) != src__ ) \ np_ = (nb_) + (nb_) * quot__; \ else \ np_ = inb__ + (nb_) * quot__; \ } \ else if( mydist__ > 0 ) \ { \ np_ = (nb_) * quot__; \ } \ else \ { \ if( (proc_) != src__ ) \ np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ else \ np_ = (n_)+ (nb_)*(quot__ - nblk__); \ } \ } \ } \ else \ { \ if( ( n__ = (n_) - inb__ ) <= 0 ) \ { \ if( (proc_) == (src_) ) np_ = (n_); \ else np_ = 0; \ } \ else \ { \ if( ( mydist__ = (proc_) - (src_) ) < 0 ) \ mydist__ += (nprocs_); \ nblk__ = n__ / (nb_) + 1; \ mydist__ -= nblk__ - \ ( quot__ = (nblk__ / (nprocs_)) )*(nprocs_); \ if( mydist__ < 0 ) \ { \ if( (proc_) != (src_) ) \ np_ = (nb_) + (nb_) * quot__; \ else \ np_ = inb__ + (nb_) * quot__; \ } \ else if( mydist__ > 0 ) \ { \ np_ = (nb_) * quot__; \ } \ else \ { \ if( (proc_) != (src_) ) \ np_ = n__ +(nb_)+(nb_)*(quot__ - nblk__); \ else \ np_ = (n_)+ (nb_)*(quot__ - nblk__); \ } \ } \ } \ } \ else \ { \ np_ = (n_); \ } \ } #define Mnumroc( np_, n_, inb_, nb_, proc_, src_, nprocs_ ) \ MnumrocI( np_, n_, 0, inb_, nb_, proc_, src_, nprocs_ ) /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_indxg2lp STDC_ARGS( ( int *, int *, const int, const int, const int, const int, const int ) ); int HPL_indxg2l STDC_ARGS( ( const int, const int, const int, const int, const int ) ); int HPL_indxg2p STDC_ARGS( ( const int, const int, const int, const int, const int ) ); int HPL_indxl2g STDC_ARGS( ( const int, const int, const int, const int, const int, const int ) ); void HPL_infog2l STDC_ARGS( ( int, int, const int, const int, const int, const int, const int, const int, const int, const int, const int, const int, int *, int *, int *, int * ) ); int HPL_numroc STDC_ARGS( ( const int, const int, const int, const int, const int, const int ) ); int HPL_numrocI STDC_ARGS( ( const int, const int, const int, const int, const int, const int, const int ) ); void HPL_dlaswp00N STDC_ARGS( ( const int, const int, double *, const int, const int * ) ); void HPL_dlaswp10N STDC_ARGS( ( const int, const int, double *, const int, const int * ) ); void HPL_dlaswp01N STDC_ARGS( ( const int, const int, double *, const int, double *, const int, const int *, const int * ) ); void HPL_dlaswp01T STDC_ARGS( ( const int, const int, double *, const int, double *, const int, const int *, const int * ) ); void HPL_dlaswp02N STDC_ARGS( ( const int, const int, const double *, const int, double *, double *, const int, const int *, const int * ) ); void HPL_dlaswp03N STDC_ARGS( ( const int, const int, double *, const int, const double *, const double *, const int ) ); void HPL_dlaswp03T STDC_ARGS( ( const int, const int, double *, const int, const double *, const double *, const int ) ); void HPL_dlaswp04N STDC_ARGS( ( const int, const int, const int, double *, const int, double *, const int, const double *, const double *, const int, const int *, const int * ) ); void HPL_dlaswp04T STDC_ARGS( ( const int, const int, const int, double *, const int, double *, const int, const double *, const double *, const int, const int *, const int * ) ); void HPL_dlaswp05N STDC_ARGS( ( const int, const int, double *, const int, const double *, const int, const int *, const int * ) ); void HPL_dlaswp05T STDC_ARGS( ( const int, const int, double *, const int, const double *, const int, const int *, const int * ) ); void HPL_dlaswp06N STDC_ARGS( ( const int, const int, double *, const int, double *, const int, const int * ) ); void HPL_dlaswp06T STDC_ARGS( ( const int, const int, double *, const int, double *, const int, const int * ) ); void HPL_pabort STDC_ARGS( ( int, const char *, const char *, ... ) ); void HPL_pwarn STDC_ARGS( ( FILE *, int, const char *, const char *, ... ) ); void HPL_pdlaprnt STDC_ARGS( ( const HPL_T_grid *, const int, const int, const int, double *, const int, const int, const int, const char * ) ); double HPL_pdlamch STDC_ARGS( ( MPI_Comm, const HPL_T_MACH ) ); double HPL_pdlange STDC_ARGS( ( const HPL_T_grid *, const HPL_T_NORM, const int, const int, const int, const double *, const int ) ); #endif /* * End of hpl_pauxil.h */ hpcc-1.4.1/hpl/include/hpl_pfact.h0000644000000000000000000001444211256503657013657 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PFACT_H #define HPL_PFACT_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_gesv.h" #include "hpl_pmisc.h" #include "hpl_pauxil.h" #include "hpl_panel.h" /* * --------------------------------------------------------------------- * #typedefs and data structures * --------------------------------------------------------------------- */ typedef void (*HPL_T_PFA_FUN) ( HPL_T_panel *, const int, const int, const int, double * ); typedef void (*HPL_T_RFA_FUN) ( HPL_T_panel *, const int, const int, const int, double * ); typedef void (*HPL_T_UPD_FUN) ( HPL_T_panel *, int *, HPL_T_panel *, const int ); /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_dlocmax STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_dlocswpN STDC_ARGS( ( HPL_T_panel *, const int, const int, double * ) ); void HPL_dlocswpT STDC_ARGS( ( HPL_T_panel *, const int, const int, double * ) ); void HPL_pdmxswp STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpancrN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpancrT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpanllN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpanllT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpanrlN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdpanrlT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpancrN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpancrT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpanllN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpanllT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpanrlN STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdrpanrlT STDC_ARGS( ( HPL_T_panel *, const int, const int, const int, double * ) ); void HPL_pdfact STDC_ARGS( ( HPL_T_panel * ) ); #endif /* * End of hpl_pfact.h */ hpcc-1.4.1/hpl/include/hpl_pgesv.h0000644000000000000000000002373311256503657013711 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PGESV_H #define HPL_PGESV_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" #include "hpl_pmisc.h" #include "hpl_grid.h" #include "hpl_comm.h" #include "hpl_pauxil.h" #include "hpl_panel.h" #include "hpl_pfact.h" /* * --------------------------------------------------------------------- * #typedefs and data structures * --------------------------------------------------------------------- */ typedef enum { HPL_SWAP00 = 451, /* Use HPL_pdlaswp00 */ HPL_SWAP01 = 452, /* Use HPL_pdlaswp01 */ HPL_SW_MIX = 453, /* Use HPL_pdlaswp00_ for small number of */ /* columns, and HPL_pdlaswp01_ otherwise. */ HPL_NO_SWP = 499 } HPL_T_SWAP; typedef struct HPL_S_palg { HPL_T_TOP btopo; /* row broadcast topology */ int depth; /* look-ahead depth */ int nbdiv; /* recursive division factor */ int nbmin; /* recursion stopping criterium */ HPL_T_FACT pfact; /* panel fact variant */ HPL_T_FACT rfact; /* recursive fact variant */ HPL_T_PFA_FUN pffun; /* panel fact function ptr */ HPL_T_RFA_FUN rffun; /* recursive fact function ptr */ HPL_T_UPD_FUN upfun; /* update function */ HPL_T_SWAP fswap; /* Swapping algorithm */ int fsthr; /* Swapping threshold */ int equil; /* Equilibration */ int align; /* data alignment constant */ } HPL_T_palg; typedef struct HPL_S_pmat { #ifdef HPL_CALL_VSIPL vsip_block_d * block; #endif double * A; /* pointer to local piece of A */ double * X; /* pointer to solution vector */ int n; /* global problem size */ int nb; /* blocking factor */ int ld; /* local leading dimension */ int mp; /* local number of rows */ int nq; /* local number of columns */ int info; /* computational flag */ } HPL_T_pmat; /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define MSGID_BEGIN_PFACT 1001 /* message id ranges */ #define MSGID_END_PFACT 2000 #define MSGID_BEGIN_FACT 2001 #define MSGID_END_FACT 3000 #define MSGID_BEGIN_PTRSV 3001 #define MSGID_END_PTRSV 4000 #define MSGID_BEGIN_COLL 9001 #define MSGID_END_COLL 10000 /* * --------------------------------------------------------------------- * #define macros definitions * --------------------------------------------------------------------- */ #define MNxtMgid( id_, beg_, end_ ) \ (( (id_)+1 > (end_) ? (beg_) : (id_)+1 )) /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_pipid STDC_ARGS( ( HPL_T_panel *, int *, int * ) ); void HPL_plindx0 STDC_ARGS( ( HPL_T_panel *, const int, int *, int *, int *, int * ) ); void HPL_pdlaswp00N STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdlaswp00T STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_perm STDC_ARGS( ( const int, int *, int *, int * ) ); void HPL_logsort STDC_ARGS( ( const int, const int, int *, int *, int * ) ); void HPL_plindx10 STDC_ARGS( ( HPL_T_panel *, const int, const int *, int *, int *, int * ) ); void HPL_plindx1 STDC_ARGS( ( HPL_T_panel *, const int, const int *, int *, int *, int *, int *, int *, int *, int *, int * ) ); void HPL_spreadN STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const enum HPL_SIDE, const int, double *, const int, const int, const int *, const int *, const int * ) ); void HPL_spreadT STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const enum HPL_SIDE, const int, double *, const int, const int, const int *, const int *, const int * ) ); void HPL_equil STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const enum HPL_TRANS, const int, double *, const int, int *, const int *, const int *, int * ) ); void HPL_rollN STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int, double *, const int, const int *, const int *, const int * ) ); void HPL_rollT STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int, double *, const int, const int *, const int *, const int * ) ); void HPL_pdlaswp01N STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdlaswp01T STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdupdateNN STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdupdateNT STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdupdateTN STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdupdateTT STDC_ARGS( ( HPL_T_panel *, int *, HPL_T_panel *, const int ) ); void HPL_pdgesv0 STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, HPL_T_pmat * ) ); void HPL_pdgesvK1 STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, HPL_T_pmat * ) ); void HPL_pdgesvK2 STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, HPL_T_pmat * ) ); void HPL_pdgesv STDC_ARGS( ( HPL_T_grid *, HPL_T_palg *, HPL_T_pmat * ) ); void HPL_pdtrsv STDC_ARGS( ( HPL_T_grid *, HPL_T_pmat * ) ); #endif /* * End of hpl_pgesv.h */ hpcc-1.4.1/hpl/include/hpl_pmatgen.h0000644000000000000000000000736211256503657014220 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PMATGEN_H #define HPL_PMATGEN_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_matgen.h" #include "hpl_pmisc.h" #include "hpl_pauxil.h" /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_pdmatgen STDC_ARGS( ( const HPL_T_grid *, const int, const int, const int, double *, const int, const int ) ); #endif /* * End of hpl_pmatgen.h */ hpcc-1.4.1/hpl/include/hpl_pmisc.h0000644000000000000000000000653411256503657013700 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PMISC_H #define HPL_PMISC_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "mpi.h" #endif /* * End of hpl_pmisc.h */ hpcc-1.4.1/hpl/include/hpl_ptest.h0000644000000000000000000001404511256503657013720 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PTEST_H #define HPL_PTEST_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" #include "hpl_gesv.h" #include "hpl_pmisc.h" #include "hpl_pauxil.h" #include "hpl_panel.h" #include "hpl_pgesv.h" #include "hpl_ptimer.h" #include "hpl_pmatgen.h" /* * --------------------------------------------------------------------- * Data Structures * --------------------------------------------------------------------- */ typedef struct HPL_S_test { double epsil; /* epsilon machine */ double thrsh; /* threshold */ FILE * outfp; /* output stream (only in proc 0) */ int kfail; /* # of tests failed */ int kpass; /* # of tests passed */ int kskip; /* # of tests skipped */ int ktest; /* total number of tests */ } HPL_T_test; typedef struct { double Gflops, time, eps, RnormI, Anorm1, AnormI, Xnorm1, XnormI, BnormI; int N, NB, nprow, npcol, depth, nbdiv, nbmin; char cpfact, crfact, ctop, order; } HPL_RuntimeData; /* * --------------------------------------------------------------------- * #define macro constants for testing only * --------------------------------------------------------------------- */ #define HPL_LINE_MAX 256 #define HPL_MAX_PARAM 20 #define HPL_ISEED 100 /* * --------------------------------------------------------------------- * global timers for timing analysis only * --------------------------------------------------------------------- */ #ifdef HPL_DETAILED_TIMING #define HPL_TIMING_BEG 11 /* timer 0 reserved, used by main */ #define HPL_TIMING_N 6 /* number of timers defined below */ #define HPL_TIMING_RPFACT 11 /* starting from here, contiguous */ #define HPL_TIMING_PFACT 12 #define HPL_TIMING_MXSWP 13 #define HPL_TIMING_UPDATE 14 #define HPL_TIMING_LASWP 15 #define HPL_TIMING_PTRSV 16 #endif /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_pdinfo STDC_ARGS( ( HPL_T_test *, int *, int *, int *, int *, HPL_T_ORDER *, int *, int *, int *, int *, HPL_T_FACT *, int *, int *, int *, int *, int *, HPL_T_FACT *, int *, HPL_T_TOP *, int *, int *, HPL_T_SWAP *, int *, int *, int *, int *, int * ) ); void HPL_pdtest STDC_ARGS( ( HPL_T_test *, HPL_T_grid *, HPL_T_palg *, const int, const int, HPL_RuntimeData * ) ); #endif /* * End of hpl_ptest.h */ hpcc-1.4.1/hpl/include/hpl_ptimer.h0000644000000000000000000001145311256503657014061 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_PTIMER_H #define HPL_PTIMER_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_pmisc.h" /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define HPL_NPTIMER 64 #define HPL_PTIMER_STARTFLAG 5.0 #define HPL_PTIMER_ERROR -1.0 /* * --------------------------------------------------------------------- * type definitions * --------------------------------------------------------------------- */ typedef enum { HPL_WALL_PTIME = 101, HPL_CPU_PTIME = 102 } HPL_T_PTIME; typedef enum { HPL_AMAX_PTIME = 201, HPL_AMIN_PTIME = 202, HPL_SUM_PTIME = 203 } HPL_T_PTIME_OP; /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ double HPL_ptimer_cputime STDC_ARGS( ( void ) ); double HPL_ptimer_walltime STDC_ARGS( ( void ) ); void HPL_ptimer STDC_ARGS( ( const int ) ); void HPL_ptimer_boot STDC_ARGS( ( void ) ); void HPL_ptimer_combine STDC_ARGS( ( MPI_Comm comm, const HPL_T_PTIME_OP, const HPL_T_PTIME, const int, const int, double * ) ); void HPL_ptimer_disable STDC_ARGS( ( void ) ); void HPL_ptimer_enable STDC_ARGS( ( void ) ); double HPL_ptimer_inquire STDC_ARGS( ( const HPL_T_PTIME, const int ) ); #endif /* * End of hpl_ptimer.h */ hpcc-1.4.1/hpl/include/hpl_test.h0000644000000000000000000001012511256503657013533 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_TEST_H #define HPL_TEST_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" #include "hpl_blas.h" #include "hpl_auxil.h" #include "hpl_gesv.h" #include "hpl_matgen.h" #include "hpl_timer.h" /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ void HPL_dinfo STDC_ARGS( ( FILE * *, int *, int *, int *, HPL_T_FACT *, int *, int *, int *, int *, int *, HPL_T_FACT *, int *, double *, double * ) ); void HPL_dtest STDC_ARGS( ( FILE *, const int, const int, const int, HPL_T_FACT, HPL_T_FACT, const int, const double, const double, int *, int *, int * ) ); #endif /* * End of hpl_test.h */ hpcc-1.4.1/hpl/include/hpl_timer.h0000644000000000000000000001102511256503657013674 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #ifndef HPL_TIMER_H #define HPL_TIMER_H /* * --------------------------------------------------------------------- * Include files * --------------------------------------------------------------------- */ #include "hpl_misc.h" /* * --------------------------------------------------------------------- * #define macro constants * --------------------------------------------------------------------- */ #define HPL_NTIMER 64 #define HPL_TIMER_STARTFLAG 5.0 #define HPL_TIMER_ERROR -1.0 /* * --------------------------------------------------------------------- * type definitions * --------------------------------------------------------------------- */ typedef enum { HPL_WALL_TIME = 101, HPL_CPU_TIME = 102 } HPL_T_TIME; /* * --------------------------------------------------------------------- * Function prototypes * --------------------------------------------------------------------- */ double HPL_timer_cputime STDC_ARGS( ( void ) ); double HPL_timer_walltime STDC_ARGS( ( void ) ); void HPL_timer STDC_ARGS( ( const int ) ); void HPL_timer_boot STDC_ARGS( ( void ) ); void HPL_timer_enable STDC_ARGS( ( void ) ); void HPL_timer_disable STDC_ARGS( ( void ) ); double HPL_timer_inquire STDC_ARGS( ( const HPL_T_TIME, const int ) ); #endif /* * End of hpl_timer.h */ hpcc-1.4.1/hpl/lib/arch/build/Makefile.hpcc0000644000000000000000000012036111403763471015253 00000000000000# -*- Makefile -*- arch = UNKNOWN include ../../../Make.$(arch) HDEP1 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_test.h OBJS1 = ../../../src/auxil/HPL_dlacpy.o ../../../src/auxil/HPL_dlatcpy.o ../../../src/auxil/HPL_fprintf.o ../../../src/auxil/HPL_warn.o ../../../src/auxil/HPL_abort.o ../../../src/auxil/HPL_dlaprnt.o ../../../src/auxil/HPL_dlange.o HDEP2 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h OBJS2 = ../../../src/auxil/HPL_dlamch.o HDEP3 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h OBJS3 = ../../../src/blas/HPL_dcopy.o ../../../src/blas/HPL_daxpy.o ../../../src/blas/HPL_dscal.o ../../../src/blas/HPL_idamax.o ../../../src/blas/HPL_dgemv.o ../../../src/blas/HPL_dtrsv.o ../../../src/blas/HPL_dger.o ../../../src/blas/HPL_dgemm.o ../../../src/blas/HPL_dtrsm.o HDEP4 = ../../../include/hpl_misc.h ../../../include/hpl_pmisc.h ../../../include/hpl_grid.h ../../../include/hpl_panel.h ../../../include/hpl_pgesv.h OBJS4 = ../../../src/comm/HPL_1ring.o ../../../src/comm/HPL_1rinM.o ../../../src/comm/HPL_2ring.o ../../../src/comm/HPL_2rinM.o ../../../src/comm/HPL_blong.o ../../../src/comm/HPL_blonM.o ../../../src/comm/HPL_packL.o ../../../src/comm/HPL_copyL.o ../../../src/comm/HPL_binit.o ../../../src/comm/HPL_bcast.o ../../../src/comm/HPL_bwait.o ../../../src/comm/HPL_send.o ../../../src/comm/HPL_recv.o ../../../src/comm/HPL_sdrv.o HDEP5 = ../../../include/hpl_misc.h ../../../include/hpl_pmisc.h ../../../include/hpl_grid.h OBJS5 = ../../../src/grid/HPL_grid_init.o ../../../src/grid/HPL_pnum.o ../../../src/grid/HPL_grid_info.o ../../../src/grid/HPL_grid_exit.o ../../../src/grid/HPL_broadcast.o ../../../src/grid/HPL_reduce.o ../../../src/grid/HPL_all_reduce.o ../../../src/grid/HPL_barrier.o ../../../src/grid/HPL_min.o ../../../src/grid/HPL_max.o ../../../src/grid/HPL_sum.o HDEP6 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_pmisc.h ../../../include/hpl_grid.h ../../../include/hpl_comm.h ../../../include/hpl_pauxil.h ../../../include/hpl_panel.h ../../../include/hpl_pfact.h ../../../include/hpl_pgesv.h OBJS6 = ../../../src/panel/HPL_pdpanel_new.o ../../../src/panel/HPL_pdpanel_init.o ../../../src/panel/HPL_pdpanel_disp.o ../../../src/panel/HPL_pdpanel_free.o HDEP7 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_pmisc.h ../../../include/hpl_grid.h ../../../include/hpl_pauxil.h OBJS7 = ../../../src/pauxil/HPL_indxg2l.o ../../../src/pauxil/HPL_indxg2lp.o ../../../src/pauxil/HPL_indxg2p.o ../../../src/pauxil/HPL_indxl2g.o ../../../src/pauxil/HPL_infog2l.o ../../../src/pauxil/HPL_numroc.o ../../../src/pauxil/HPL_numrocI.o ../../../src/pauxil/HPL_dlaswp00N.o ../../../src/pauxil/HPL_dlaswp10N.o ../../../src/pauxil/HPL_dlaswp01N.o ../../../src/pauxil/HPL_dlaswp01T.o ../../../src/pauxil/HPL_dlaswp02N.o ../../../src/pauxil/HPL_dlaswp03N.o ../../../src/pauxil/HPL_dlaswp03T.o ../../../src/pauxil/HPL_dlaswp04N.o ../../../src/pauxil/HPL_dlaswp04T.o ../../../src/pauxil/HPL_dlaswp05N.o ../../../src/pauxil/HPL_dlaswp05T.o ../../../src/pauxil/HPL_dlaswp06N.o ../../../src/pauxil/HPL_dlaswp06T.o ../../../src/pauxil/HPL_pwarn.o ../../../src/pauxil/HPL_pabort.o ../../../src/pauxil/HPL_pdlaprnt.o ../../../src/pauxil/HPL_pdlamch.o ../../../src/pauxil/HPL_pdlange.o HDEP8 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_pmisc.h ../../../include/hpl_pauxil.h ../../../include/hpl_pfact.h OBJS8 = ../../../src/pfact/HPL_dlocmax.o ../../../src/pfact/HPL_dlocswpN.o ../../../src/pfact/HPL_dlocswpT.o ../../../src/pfact/HPL_pdmxswp.o ../../../src/pfact/HPL_pdpancrN.o ../../../src/pfact/HPL_pdpancrT.o ../../../src/pfact/HPL_pdpanllN.o ../../../src/pfact/HPL_pdpanllT.o ../../../src/pfact/HPL_pdpanrlN.o ../../../src/pfact/HPL_pdpanrlT.o ../../../src/pfact/HPL_pdrpanllN.o ../../../src/pfact/HPL_pdrpanllT.o ../../../src/pfact/HPL_pdrpancrN.o ../../../src/pfact/HPL_pdrpancrT.o ../../../src/pfact/HPL_pdrpanrlN.o ../../../src/pfact/HPL_pdrpanrlT.o ../../../src/pfact/HPL_pdfact.o HDEP9 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_pmisc.h ../../../include/hpl_grid.h ../../../include/hpl_comm.h ../../../include/hpl_pauxil.h ../../../include/hpl_panel.h ../../../include/hpl_pfact.h ../../../include/hpl_pgesv.h OBJS9 = ../../../src/pgesv/HPL_pipid.o ../../../src/pgesv/HPL_plindx0.o ../../../src/pgesv/HPL_pdlaswp00N.o ../../../src/pgesv/HPL_pdlaswp00T.o ../../../src/pgesv/HPL_perm.o ../../../src/pgesv/HPL_logsort.o ../../../src/pgesv/HPL_plindx10.o ../../../src/pgesv/HPL_plindx1.o ../../../src/pgesv/HPL_spreadN.o ../../../src/pgesv/HPL_spreadT.o ../../../src/pgesv/HPL_rollN.o ../../../src/pgesv/HPL_rollT.o ../../../src/pgesv/HPL_equil.o ../../../src/pgesv/HPL_pdlaswp01N.o ../../../src/pgesv/HPL_pdlaswp01T.o ../../../src/pgesv/HPL_pdupdateNN.o ../../../src/pgesv/HPL_pdupdateNT.o ../../../src/pgesv/HPL_pdupdateTN.o ../../../src/pgesv/HPL_pdupdateTT.o ../../../src/pgesv/HPL_pdtrsv.o ../../../src/pgesv/HPL_pdgesv0.o ../../../src/pgesv/HPL_pdgesvK1.o ../../../src/pgesv/HPL_pdgesvK2.o ../../../src/pgesv/HPL_pdgesv.o HDEP10 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_matgen.h OBJS10 = ../../../testing/matgen/HPL_dmatgen.o ../../../testing/matgen/HPL_ladd.o ../../../testing/matgen/HPL_lmul.o ../../../testing/matgen/HPL_xjumpm.o ../../../testing/matgen/HPL_jumpit.o ../../../testing/matgen/HPL_rand.o ../../../testing/matgen/HPL_setran.o HDEP11 = ../../../include/hpl_pmisc.h ../../../include/hpl_timer.h OBJS11 = ../../../testing/timer/HPL_timer.o ../../../testing/timer/HPL_timer_cputime.o ../../../testing/timer/HPL_timer_walltime.o HDEP12 = ../../../include/hpl_pmisc.h ../../../include/hpl_matgen.h ../../../include/hpl_pmisc.h ../../../include/hpl_pauxil.h ../../../include/hpl_pmatgen.h OBJS12 = ../../../testing/pmatgen/HPL_pdmatgen.o HDEP13 = ../../../include/hpl_pmisc.h ../../../include/hpl_ptimer.h OBJS13 = ../../../testing/ptimer/HPL_ptimer.o ../../../testing/ptimer/HPL_ptimer_cputime.o ../../../testing/ptimer/HPL_ptimer_walltime.o HDEP14 = ../../../include/hpl_misc.h ../../../include/hpl_blas.h ../../../include/hpl_auxil.h ../../../include/hpl_gesv.h ../../../include/hpl_pmisc.h ../../../include/hpl_pauxil.h ../../../include/hpl_panel.h ../../../include/hpl_pmatgen.h ../../../include/hpl_pgesv.h ../../../include/hpl_ptimer.h ../../../include/hpl_ptest.h OBJS14 = ../../../testing/ptest/HPL_pddriver.o ../../../testing/ptest/HPL_pdinfo.o ../../../testing/ptest/HPL_pdtest.o HDEP15 = ../../../../include/hpcc.h ../../../include/hpl.h ../../../../RandomAccess/RandomAccess.h ../../../../RandomAccess/buckets.h ../../../../RandomAccess/heap.h ../../../../RandomAccess/pool.h ../../../../RandomAccess/time_bound.h ../../../../RandomAccess/verification.h OBJS15 = ../../../../RandomAccess/MPIRandomAccess.o ../../../../RandomAccess/buckets.o ../../../../RandomAccess/core_single_cpu_lcg.o ../../../../RandomAccess/core_single_cpu.o ../../../../RandomAccess/heap.o ../../../../RandomAccess/pool.o ../../../../RandomAccess/single_cpu_lcg.o ../../../../RandomAccess/single_cpu.o ../../../../RandomAccess/star_single_cpu_lcg.o ../../../../RandomAccess/star_single_cpu.o ../../../../RandomAccess/time_bound.o ../../../../RandomAccess/utility.o ../../../../RandomAccess/verification_lcg.o ../../../../RandomAccess/verification.o ../../../../RandomAccess/MPIRandomAccess_vanilla.o ../../../../RandomAccess/MPIRandomAccess_opt.o ../../../../RandomAccess/MPIRandomAccessLCG.o ../../../../RandomAccess/MPIRandomAccessLCG_vanilla.o ../../../../RandomAccess/MPIRandomAccessLCG_opt.o HDEP16 = ../../../../include/hpcc.h ../../../include/hpl.h OBJS16 = ../../../../STREAM/onecpu.o ../../../../STREAM/stream.o HDEP17 = ../../../../include/hpcc.h ../../../include/hpl.h ../../../../PTRANS/cblacslt.h OBJS17 = ../../../../PTRANS/pmatgeninc.o ../../../../PTRANS/pdmatgen.o ../../../../PTRANS/pdtransdriver.o ../../../../PTRANS/pdmatcmp.o ../../../../PTRANS/pdtrans.o ../../../../PTRANS/sclapack.o ../../../../PTRANS/cblacslt.o ../../../../PTRANS/mem.o HDEP18 = ../../../../include/hpcc.h ../../../../include/hpccver.h ../../../include/hpccmema.h ../../../include/hpl.h OBJS18 = ../../../../src/bench_lat_bw_1.5.2.o ../../../../src/hpcc.o ../../../../src/io.o ../../../../src/extinit.o ../../../../src/extfinalize.o HDEP19 = ../../../../include/hpcc.h ../../../include/hpl.h OBJS19 = ../../../../src/HPL_slamch.o ../../../../src/noopt.o HDEP20 = ../../../../include/hpcc.h ../../../include/hpl.h OBJS20 = ../../../../DGEMM/tstdgemm.o ../../../../DGEMM/onecpu.o HDEP21 = ../../../../include/hpcc.h ../../../include/hpl.h ../../../../FFT/hpccfft.h ../../../../FFT/wrapfftw.h ../../../../FFT/wrapmpifftw.h OBJS21 = ../../../../FFT/bcnrand.o ../../../../FFT/fft235.o ../../../../FFT/zfft1d.o ../../../../FFT/pzfft1d.o ../../../../FFT/onecpu.o ../../../../FFT/tstfft.o ../../../../FFT/wrapfftw.o ../../../../FFT/wrapmpifftw.o ../../../../FFT/mpifft.o ../../../../hpcc : $(HPLlib) $(LINKER) $(LINKFLAGS) -o ../../../../hpcc $(HPL_LIBS) $(HPLlib): $(OBJS1) $(OBJS2) $(OBJS3) $(OBJS4) $(OBJS5) $(OBJS6) $(OBJS7) $(OBJS8) $(OBJS9) $(OBJS10) $(OBJS11) $(OBJS12) $(OBJS13) $(OBJS14) $(OBJS15) $(OBJS16) $(OBJS17) $(OBJS18) $(OBJS19) $(OBJS20) $(OBJS21) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(OBJS1) $(OBJS2) $(OBJS3) $(OBJS4) $(OBJS5) $(OBJS6) $(OBJS7) $(OBJS8) $(OBJS9) $(OBJS10) $(OBJS11) $(OBJS12) $(OBJS13) $(OBJS14) $(OBJS15) $(OBJS16) $(OBJS17) $(OBJS18) $(OBJS19) $(OBJS20) $(OBJS21) $(RANLIB) $(HPLlib) clean: $(RM) ../../../../hpcc $(HPLlib) $(RM) $(OBJS1) $(OBJS2) $(OBJS3) $(OBJS4) $(OBJS5) $(OBJS6) $(OBJS7) $(OBJS8) $(OBJS9) $(OBJS10) $(OBJS11) $(OBJS12) $(OBJS13) $(OBJS14) $(OBJS15) $(OBJS16) $(OBJS17) $(OBJS18) $(OBJS19) $(OBJS20) $(OBJS21) ../../../src/auxil/HPL_dlacpy.o : ../../../src/auxil/HPL_dlacpy.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_dlacpy.o -c ../../../src/auxil/HPL_dlacpy.c $(CCFLAGS) ../../../src/auxil/HPL_dlatcpy.o : ../../../src/auxil/HPL_dlatcpy.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_dlatcpy.o -c ../../../src/auxil/HPL_dlatcpy.c $(CCFLAGS) ../../../src/auxil/HPL_fprintf.o : ../../../src/auxil/HPL_fprintf.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_fprintf.o -c ../../../src/auxil/HPL_fprintf.c $(CCFLAGS) ../../../src/auxil/HPL_warn.o : ../../../src/auxil/HPL_warn.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_warn.o -c ../../../src/auxil/HPL_warn.c $(CCFLAGS) ../../../src/auxil/HPL_abort.o : ../../../src/auxil/HPL_abort.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_abort.o -c ../../../src/auxil/HPL_abort.c $(CCFLAGS) ../../../src/auxil/HPL_dlaprnt.o : ../../../src/auxil/HPL_dlaprnt.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_dlaprnt.o -c ../../../src/auxil/HPL_dlaprnt.c $(CCFLAGS) ../../../src/auxil/HPL_dlange.o : ../../../src/auxil/HPL_dlange.c $(HDEP1) $(CC) -o ../../../src/auxil/HPL_dlange.o -c ../../../src/auxil/HPL_dlange.c $(CCFLAGS) ../../../src/auxil/HPL_dlamch.o : ../../../src/auxil/HPL_dlamch.c $(HDEP2) $(CC) -o ../../../src/auxil/HPL_dlamch.o -c ../../../src/auxil/HPL_dlamch.c $(CCNOOPT) ../../../src/blas/HPL_dcopy.o : ../../../src/blas/HPL_dcopy.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dcopy.o -c ../../../src/blas/HPL_dcopy.c $(CCFLAGS) ../../../src/blas/HPL_daxpy.o : ../../../src/blas/HPL_daxpy.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_daxpy.o -c ../../../src/blas/HPL_daxpy.c $(CCFLAGS) ../../../src/blas/HPL_dscal.o : ../../../src/blas/HPL_dscal.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dscal.o -c ../../../src/blas/HPL_dscal.c $(CCFLAGS) ../../../src/blas/HPL_idamax.o : ../../../src/blas/HPL_idamax.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_idamax.o -c ../../../src/blas/HPL_idamax.c $(CCFLAGS) ../../../src/blas/HPL_dgemv.o : ../../../src/blas/HPL_dgemv.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dgemv.o -c ../../../src/blas/HPL_dgemv.c $(CCFLAGS) ../../../src/blas/HPL_dtrsv.o : ../../../src/blas/HPL_dtrsv.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dtrsv.o -c ../../../src/blas/HPL_dtrsv.c $(CCFLAGS) ../../../src/blas/HPL_dger.o : ../../../src/blas/HPL_dger.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dger.o -c ../../../src/blas/HPL_dger.c $(CCFLAGS) ../../../src/blas/HPL_dgemm.o : ../../../src/blas/HPL_dgemm.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dgemm.o -c ../../../src/blas/HPL_dgemm.c $(CCFLAGS) ../../../src/blas/HPL_dtrsm.o : ../../../src/blas/HPL_dtrsm.c $(HDEP3) $(CC) -o ../../../src/blas/HPL_dtrsm.o -c ../../../src/blas/HPL_dtrsm.c $(CCFLAGS) ../../../src/comm/HPL_1ring.o : ../../../src/comm/HPL_1ring.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_1ring.o -c ../../../src/comm/HPL_1ring.c $(CCFLAGS) ../../../src/comm/HPL_1rinM.o : ../../../src/comm/HPL_1rinM.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_1rinM.o -c ../../../src/comm/HPL_1rinM.c $(CCFLAGS) ../../../src/comm/HPL_2ring.o : ../../../src/comm/HPL_2ring.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_2ring.o -c ../../../src/comm/HPL_2ring.c $(CCFLAGS) ../../../src/comm/HPL_2rinM.o : ../../../src/comm/HPL_2rinM.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_2rinM.o -c ../../../src/comm/HPL_2rinM.c $(CCFLAGS) ../../../src/comm/HPL_blong.o : ../../../src/comm/HPL_blong.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_blong.o -c ../../../src/comm/HPL_blong.c $(CCFLAGS) ../../../src/comm/HPL_blonM.o : ../../../src/comm/HPL_blonM.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_blonM.o -c ../../../src/comm/HPL_blonM.c $(CCFLAGS) ../../../src/comm/HPL_packL.o : ../../../src/comm/HPL_packL.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_packL.o -c ../../../src/comm/HPL_packL.c $(CCFLAGS) ../../../src/comm/HPL_copyL.o : ../../../src/comm/HPL_copyL.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_copyL.o -c ../../../src/comm/HPL_copyL.c $(CCFLAGS) ../../../src/comm/HPL_binit.o : ../../../src/comm/HPL_binit.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_binit.o -c ../../../src/comm/HPL_binit.c $(CCFLAGS) ../../../src/comm/HPL_bcast.o : ../../../src/comm/HPL_bcast.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_bcast.o -c ../../../src/comm/HPL_bcast.c $(CCFLAGS) ../../../src/comm/HPL_bwait.o : ../../../src/comm/HPL_bwait.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_bwait.o -c ../../../src/comm/HPL_bwait.c $(CCFLAGS) ../../../src/comm/HPL_send.o : ../../../src/comm/HPL_send.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_send.o -c ../../../src/comm/HPL_send.c $(CCFLAGS) ../../../src/comm/HPL_recv.o : ../../../src/comm/HPL_recv.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_recv.o -c ../../../src/comm/HPL_recv.c $(CCFLAGS) ../../../src/comm/HPL_sdrv.o : ../../../src/comm/HPL_sdrv.c $(HDEP4) $(CC) -o ../../../src/comm/HPL_sdrv.o -c ../../../src/comm/HPL_sdrv.c $(CCFLAGS) ../../../src/grid/HPL_grid_init.o : ../../../src/grid/HPL_grid_init.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_grid_init.o -c ../../../src/grid/HPL_grid_init.c $(CCFLAGS) ../../../src/grid/HPL_pnum.o : ../../../src/grid/HPL_pnum.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_pnum.o -c ../../../src/grid/HPL_pnum.c $(CCFLAGS) ../../../src/grid/HPL_grid_info.o : ../../../src/grid/HPL_grid_info.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_grid_info.o -c ../../../src/grid/HPL_grid_info.c $(CCFLAGS) ../../../src/grid/HPL_grid_exit.o : ../../../src/grid/HPL_grid_exit.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_grid_exit.o -c ../../../src/grid/HPL_grid_exit.c $(CCFLAGS) ../../../src/grid/HPL_broadcast.o : ../../../src/grid/HPL_broadcast.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_broadcast.o -c ../../../src/grid/HPL_broadcast.c $(CCFLAGS) ../../../src/grid/HPL_reduce.o : ../../../src/grid/HPL_reduce.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_reduce.o -c ../../../src/grid/HPL_reduce.c $(CCFLAGS) ../../../src/grid/HPL_all_reduce.o : ../../../src/grid/HPL_all_reduce.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_all_reduce.o -c ../../../src/grid/HPL_all_reduce.c $(CCFLAGS) ../../../src/grid/HPL_barrier.o : ../../../src/grid/HPL_barrier.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_barrier.o -c ../../../src/grid/HPL_barrier.c $(CCFLAGS) ../../../src/grid/HPL_min.o : ../../../src/grid/HPL_min.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_min.o -c ../../../src/grid/HPL_min.c $(CCFLAGS) ../../../src/grid/HPL_max.o : ../../../src/grid/HPL_max.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_max.o -c ../../../src/grid/HPL_max.c $(CCFLAGS) ../../../src/grid/HPL_sum.o : ../../../src/grid/HPL_sum.c $(HDEP5) $(CC) -o ../../../src/grid/HPL_sum.o -c ../../../src/grid/HPL_sum.c $(CCFLAGS) ../../../src/panel/HPL_pdpanel_new.o : ../../../src/panel/HPL_pdpanel_new.c $(HDEP6) $(CC) -o ../../../src/panel/HPL_pdpanel_new.o -c ../../../src/panel/HPL_pdpanel_new.c $(CCFLAGS) ../../../src/panel/HPL_pdpanel_init.o : ../../../src/panel/HPL_pdpanel_init.c $(HDEP6) $(CC) -o ../../../src/panel/HPL_pdpanel_init.o -c ../../../src/panel/HPL_pdpanel_init.c $(CCFLAGS) ../../../src/panel/HPL_pdpanel_disp.o : ../../../src/panel/HPL_pdpanel_disp.c $(HDEP6) $(CC) -o ../../../src/panel/HPL_pdpanel_disp.o -c ../../../src/panel/HPL_pdpanel_disp.c $(CCFLAGS) ../../../src/panel/HPL_pdpanel_free.o : ../../../src/panel/HPL_pdpanel_free.c $(HDEP6) $(CC) -o ../../../src/panel/HPL_pdpanel_free.o -c ../../../src/panel/HPL_pdpanel_free.c $(CCFLAGS) ../../../src/pauxil/HPL_indxg2l.o : ../../../src/pauxil/HPL_indxg2l.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_indxg2l.o -c ../../../src/pauxil/HPL_indxg2l.c $(CCFLAGS) ../../../src/pauxil/HPL_indxg2lp.o : ../../../src/pauxil/HPL_indxg2lp.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_indxg2lp.o -c ../../../src/pauxil/HPL_indxg2lp.c $(CCFLAGS) ../../../src/pauxil/HPL_indxg2p.o : ../../../src/pauxil/HPL_indxg2p.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_indxg2p.o -c ../../../src/pauxil/HPL_indxg2p.c $(CCFLAGS) ../../../src/pauxil/HPL_indxl2g.o : ../../../src/pauxil/HPL_indxl2g.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_indxl2g.o -c ../../../src/pauxil/HPL_indxl2g.c $(CCFLAGS) ../../../src/pauxil/HPL_infog2l.o : ../../../src/pauxil/HPL_infog2l.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_infog2l.o -c ../../../src/pauxil/HPL_infog2l.c $(CCFLAGS) ../../../src/pauxil/HPL_numroc.o : ../../../src/pauxil/HPL_numroc.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_numroc.o -c ../../../src/pauxil/HPL_numroc.c $(CCFLAGS) ../../../src/pauxil/HPL_numrocI.o : ../../../src/pauxil/HPL_numrocI.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_numrocI.o -c ../../../src/pauxil/HPL_numrocI.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp00N.o : ../../../src/pauxil/HPL_dlaswp00N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp00N.o -c ../../../src/pauxil/HPL_dlaswp00N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp10N.o : ../../../src/pauxil/HPL_dlaswp10N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp10N.o -c ../../../src/pauxil/HPL_dlaswp10N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp01N.o : ../../../src/pauxil/HPL_dlaswp01N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp01N.o -c ../../../src/pauxil/HPL_dlaswp01N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp01T.o : ../../../src/pauxil/HPL_dlaswp01T.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp01T.o -c ../../../src/pauxil/HPL_dlaswp01T.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp02N.o : ../../../src/pauxil/HPL_dlaswp02N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp02N.o -c ../../../src/pauxil/HPL_dlaswp02N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp03N.o : ../../../src/pauxil/HPL_dlaswp03N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp03N.o -c ../../../src/pauxil/HPL_dlaswp03N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp03T.o : ../../../src/pauxil/HPL_dlaswp03T.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp03T.o -c ../../../src/pauxil/HPL_dlaswp03T.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp04N.o : ../../../src/pauxil/HPL_dlaswp04N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp04N.o -c ../../../src/pauxil/HPL_dlaswp04N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp04T.o : ../../../src/pauxil/HPL_dlaswp04T.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp04T.o -c ../../../src/pauxil/HPL_dlaswp04T.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp05N.o : ../../../src/pauxil/HPL_dlaswp05N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp05N.o -c ../../../src/pauxil/HPL_dlaswp05N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp05T.o : ../../../src/pauxil/HPL_dlaswp05T.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp05T.o -c ../../../src/pauxil/HPL_dlaswp05T.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp06N.o : ../../../src/pauxil/HPL_dlaswp06N.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp06N.o -c ../../../src/pauxil/HPL_dlaswp06N.c $(CCFLAGS) ../../../src/pauxil/HPL_dlaswp06T.o : ../../../src/pauxil/HPL_dlaswp06T.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_dlaswp06T.o -c ../../../src/pauxil/HPL_dlaswp06T.c $(CCFLAGS) ../../../src/pauxil/HPL_pwarn.o : ../../../src/pauxil/HPL_pwarn.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_pwarn.o -c ../../../src/pauxil/HPL_pwarn.c $(CCFLAGS) ../../../src/pauxil/HPL_pabort.o : ../../../src/pauxil/HPL_pabort.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_pabort.o -c ../../../src/pauxil/HPL_pabort.c $(CCFLAGS) ../../../src/pauxil/HPL_pdlaprnt.o : ../../../src/pauxil/HPL_pdlaprnt.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_pdlaprnt.o -c ../../../src/pauxil/HPL_pdlaprnt.c $(CCFLAGS) ../../../src/pauxil/HPL_pdlamch.o : ../../../src/pauxil/HPL_pdlamch.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_pdlamch.o -c ../../../src/pauxil/HPL_pdlamch.c $(CCFLAGS) ../../../src/pauxil/HPL_pdlange.o : ../../../src/pauxil/HPL_pdlange.c $(HDEP7) $(CC) -o ../../../src/pauxil/HPL_pdlange.o -c ../../../src/pauxil/HPL_pdlange.c $(CCFLAGS) ../../../src/pfact/HPL_dlocmax.o : ../../../src/pfact/HPL_dlocmax.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_dlocmax.o -c ../../../src/pfact/HPL_dlocmax.c $(CCFLAGS) ../../../src/pfact/HPL_dlocswpN.o : ../../../src/pfact/HPL_dlocswpN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_dlocswpN.o -c ../../../src/pfact/HPL_dlocswpN.c $(CCFLAGS) ../../../src/pfact/HPL_dlocswpT.o : ../../../src/pfact/HPL_dlocswpT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_dlocswpT.o -c ../../../src/pfact/HPL_dlocswpT.c $(CCFLAGS) ../../../src/pfact/HPL_pdmxswp.o : ../../../src/pfact/HPL_pdmxswp.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdmxswp.o -c ../../../src/pfact/HPL_pdmxswp.c $(CCFLAGS) ../../../src/pfact/HPL_pdpancrN.o : ../../../src/pfact/HPL_pdpancrN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpancrN.o -c ../../../src/pfact/HPL_pdpancrN.c $(CCFLAGS) ../../../src/pfact/HPL_pdpancrT.o : ../../../src/pfact/HPL_pdpancrT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpancrT.o -c ../../../src/pfact/HPL_pdpancrT.c $(CCFLAGS) ../../../src/pfact/HPL_pdpanllN.o : ../../../src/pfact/HPL_pdpanllN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpanllN.o -c ../../../src/pfact/HPL_pdpanllN.c $(CCFLAGS) ../../../src/pfact/HPL_pdpanllT.o : ../../../src/pfact/HPL_pdpanllT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpanllT.o -c ../../../src/pfact/HPL_pdpanllT.c $(CCFLAGS) ../../../src/pfact/HPL_pdpanrlN.o : ../../../src/pfact/HPL_pdpanrlN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpanrlN.o -c ../../../src/pfact/HPL_pdpanrlN.c $(CCFLAGS) ../../../src/pfact/HPL_pdpanrlT.o : ../../../src/pfact/HPL_pdpanrlT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdpanrlT.o -c ../../../src/pfact/HPL_pdpanrlT.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpanllN.o : ../../../src/pfact/HPL_pdrpanllN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpanllN.o -c ../../../src/pfact/HPL_pdrpanllN.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpanllT.o : ../../../src/pfact/HPL_pdrpanllT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpanllT.o -c ../../../src/pfact/HPL_pdrpanllT.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpancrN.o : ../../../src/pfact/HPL_pdrpancrN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpancrN.o -c ../../../src/pfact/HPL_pdrpancrN.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpancrT.o : ../../../src/pfact/HPL_pdrpancrT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpancrT.o -c ../../../src/pfact/HPL_pdrpancrT.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpanrlN.o : ../../../src/pfact/HPL_pdrpanrlN.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpanrlN.o -c ../../../src/pfact/HPL_pdrpanrlN.c $(CCFLAGS) ../../../src/pfact/HPL_pdrpanrlT.o : ../../../src/pfact/HPL_pdrpanrlT.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdrpanrlT.o -c ../../../src/pfact/HPL_pdrpanrlT.c $(CCFLAGS) ../../../src/pfact/HPL_pdfact.o : ../../../src/pfact/HPL_pdfact.c $(HDEP8) $(CC) -o ../../../src/pfact/HPL_pdfact.o -c ../../../src/pfact/HPL_pdfact.c $(CCFLAGS) ../../../src/pgesv/HPL_pipid.o : ../../../src/pgesv/HPL_pipid.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pipid.o -c ../../../src/pgesv/HPL_pipid.c $(CCFLAGS) ../../../src/pgesv/HPL_plindx0.o : ../../../src/pgesv/HPL_plindx0.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_plindx0.o -c ../../../src/pgesv/HPL_plindx0.c $(CCFLAGS) ../../../src/pgesv/HPL_pdlaswp00N.o : ../../../src/pgesv/HPL_pdlaswp00N.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdlaswp00N.o -c ../../../src/pgesv/HPL_pdlaswp00N.c $(CCFLAGS) ../../../src/pgesv/HPL_pdlaswp00T.o : ../../../src/pgesv/HPL_pdlaswp00T.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdlaswp00T.o -c ../../../src/pgesv/HPL_pdlaswp00T.c $(CCFLAGS) ../../../src/pgesv/HPL_perm.o : ../../../src/pgesv/HPL_perm.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_perm.o -c ../../../src/pgesv/HPL_perm.c $(CCFLAGS) ../../../src/pgesv/HPL_logsort.o : ../../../src/pgesv/HPL_logsort.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_logsort.o -c ../../../src/pgesv/HPL_logsort.c $(CCFLAGS) ../../../src/pgesv/HPL_plindx10.o : ../../../src/pgesv/HPL_plindx10.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_plindx10.o -c ../../../src/pgesv/HPL_plindx10.c $(CCFLAGS) ../../../src/pgesv/HPL_plindx1.o : ../../../src/pgesv/HPL_plindx1.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_plindx1.o -c ../../../src/pgesv/HPL_plindx1.c $(CCFLAGS) ../../../src/pgesv/HPL_spreadN.o : ../../../src/pgesv/HPL_spreadN.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_spreadN.o -c ../../../src/pgesv/HPL_spreadN.c $(CCFLAGS) ../../../src/pgesv/HPL_spreadT.o : ../../../src/pgesv/HPL_spreadT.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_spreadT.o -c ../../../src/pgesv/HPL_spreadT.c $(CCFLAGS) ../../../src/pgesv/HPL_rollN.o : ../../../src/pgesv/HPL_rollN.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_rollN.o -c ../../../src/pgesv/HPL_rollN.c $(CCFLAGS) ../../../src/pgesv/HPL_rollT.o : ../../../src/pgesv/HPL_rollT.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_rollT.o -c ../../../src/pgesv/HPL_rollT.c $(CCFLAGS) ../../../src/pgesv/HPL_equil.o : ../../../src/pgesv/HPL_equil.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_equil.o -c ../../../src/pgesv/HPL_equil.c $(CCFLAGS) ../../../src/pgesv/HPL_pdlaswp01N.o : ../../../src/pgesv/HPL_pdlaswp01N.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdlaswp01N.o -c ../../../src/pgesv/HPL_pdlaswp01N.c $(CCFLAGS) ../../../src/pgesv/HPL_pdlaswp01T.o : ../../../src/pgesv/HPL_pdlaswp01T.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdlaswp01T.o -c ../../../src/pgesv/HPL_pdlaswp01T.c $(CCFLAGS) ../../../src/pgesv/HPL_pdupdateNN.o : ../../../src/pgesv/HPL_pdupdateNN.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdupdateNN.o -c ../../../src/pgesv/HPL_pdupdateNN.c $(CCFLAGS) ../../../src/pgesv/HPL_pdupdateNT.o : ../../../src/pgesv/HPL_pdupdateNT.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdupdateNT.o -c ../../../src/pgesv/HPL_pdupdateNT.c $(CCFLAGS) ../../../src/pgesv/HPL_pdupdateTN.o : ../../../src/pgesv/HPL_pdupdateTN.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdupdateTN.o -c ../../../src/pgesv/HPL_pdupdateTN.c $(CCFLAGS) ../../../src/pgesv/HPL_pdupdateTT.o : ../../../src/pgesv/HPL_pdupdateTT.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdupdateTT.o -c ../../../src/pgesv/HPL_pdupdateTT.c $(CCFLAGS) ../../../src/pgesv/HPL_pdtrsv.o : ../../../src/pgesv/HPL_pdtrsv.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdtrsv.o -c ../../../src/pgesv/HPL_pdtrsv.c $(CCFLAGS) ../../../src/pgesv/HPL_pdgesv0.o : ../../../src/pgesv/HPL_pdgesv0.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdgesv0.o -c ../../../src/pgesv/HPL_pdgesv0.c $(CCFLAGS) ../../../src/pgesv/HPL_pdgesvK1.o : ../../../src/pgesv/HPL_pdgesvK1.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdgesvK1.o -c ../../../src/pgesv/HPL_pdgesvK1.c $(CCFLAGS) ../../../src/pgesv/HPL_pdgesvK2.o : ../../../src/pgesv/HPL_pdgesvK2.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdgesvK2.o -c ../../../src/pgesv/HPL_pdgesvK2.c $(CCFLAGS) ../../../src/pgesv/HPL_pdgesv.o : ../../../src/pgesv/HPL_pdgesv.c $(HDEP9) $(CC) -o ../../../src/pgesv/HPL_pdgesv.o -c ../../../src/pgesv/HPL_pdgesv.c $(CCFLAGS) ../../../testing/matgen/HPL_dmatgen.o : ../../../testing/matgen/HPL_dmatgen.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_dmatgen.o -c ../../../testing/matgen/HPL_dmatgen.c $(CCFLAGS) ../../../testing/matgen/HPL_ladd.o : ../../../testing/matgen/HPL_ladd.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_ladd.o -c ../../../testing/matgen/HPL_ladd.c $(CCFLAGS) ../../../testing/matgen/HPL_lmul.o : ../../../testing/matgen/HPL_lmul.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_lmul.o -c ../../../testing/matgen/HPL_lmul.c $(CCFLAGS) ../../../testing/matgen/HPL_xjumpm.o : ../../../testing/matgen/HPL_xjumpm.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_xjumpm.o -c ../../../testing/matgen/HPL_xjumpm.c $(CCFLAGS) ../../../testing/matgen/HPL_jumpit.o : ../../../testing/matgen/HPL_jumpit.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_jumpit.o -c ../../../testing/matgen/HPL_jumpit.c $(CCFLAGS) ../../../testing/matgen/HPL_rand.o : ../../../testing/matgen/HPL_rand.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_rand.o -c ../../../testing/matgen/HPL_rand.c $(CCFLAGS) ../../../testing/matgen/HPL_setran.o : ../../../testing/matgen/HPL_setran.c $(HDEP10) $(CC) -o ../../../testing/matgen/HPL_setran.o -c ../../../testing/matgen/HPL_setran.c $(CCFLAGS) ../../../testing/timer/HPL_timer.o : ../../../testing/timer/HPL_timer.c $(HDEP11) $(CC) -o ../../../testing/timer/HPL_timer.o -c ../../../testing/timer/HPL_timer.c $(CCFLAGS) ../../../testing/timer/HPL_timer_cputime.o : ../../../testing/timer/HPL_timer_cputime.c $(HDEP11) $(CC) -o ../../../testing/timer/HPL_timer_cputime.o -c ../../../testing/timer/HPL_timer_cputime.c $(CCFLAGS) ../../../testing/timer/HPL_timer_walltime.o : ../../../testing/timer/HPL_timer_walltime.c $(HDEP11) $(CC) -o ../../../testing/timer/HPL_timer_walltime.o -c ../../../testing/timer/HPL_timer_walltime.c $(CCFLAGS) ../../../testing/pmatgen/HPL_pdmatgen.o : ../../../testing/pmatgen/HPL_pdmatgen.c $(HDEP12) $(CC) -o ../../../testing/pmatgen/HPL_pdmatgen.o -c ../../../testing/pmatgen/HPL_pdmatgen.c $(CCFLAGS) ../../../testing/ptimer/HPL_ptimer.o : ../../../testing/ptimer/HPL_ptimer.c $(HDEP13) $(CC) -o ../../../testing/ptimer/HPL_ptimer.o -c ../../../testing/ptimer/HPL_ptimer.c $(CCFLAGS) ../../../testing/ptimer/HPL_ptimer_cputime.o : ../../../testing/ptimer/HPL_ptimer_cputime.c $(HDEP13) $(CC) -o ../../../testing/ptimer/HPL_ptimer_cputime.o -c ../../../testing/ptimer/HPL_ptimer_cputime.c $(CCFLAGS) ../../../testing/ptimer/HPL_ptimer_walltime.o : ../../../testing/ptimer/HPL_ptimer_walltime.c $(HDEP13) $(CC) -o ../../../testing/ptimer/HPL_ptimer_walltime.o -c ../../../testing/ptimer/HPL_ptimer_walltime.c $(CCFLAGS) ../../../testing/ptest/HPL_pddriver.o : ../../../testing/ptest/HPL_pddriver.c $(HDEP14) $(CC) -o ../../../testing/ptest/HPL_pddriver.o -c ../../../testing/ptest/HPL_pddriver.c $(CCFLAGS) ../../../testing/ptest/HPL_pdinfo.o : ../../../testing/ptest/HPL_pdinfo.c $(HDEP14) $(CC) -o ../../../testing/ptest/HPL_pdinfo.o -c ../../../testing/ptest/HPL_pdinfo.c $(CCFLAGS) ../../../testing/ptest/HPL_pdtest.o : ../../../testing/ptest/HPL_pdtest.c $(HDEP14) $(CC) -o ../../../testing/ptest/HPL_pdtest.o -c ../../../testing/ptest/HPL_pdtest.c $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccess.o : ../../../../RandomAccess/MPIRandomAccess.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccess.o -c ../../../../RandomAccess/MPIRandomAccess.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/buckets.o : ../../../../RandomAccess/buckets.c $(HDEP15) $(CC) -o ../../../../RandomAccess/buckets.o -c ../../../../RandomAccess/buckets.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/core_single_cpu_lcg.o : ../../../../RandomAccess/core_single_cpu_lcg.c $(HDEP15) $(CC) -o ../../../../RandomAccess/core_single_cpu_lcg.o -c ../../../../RandomAccess/core_single_cpu_lcg.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/core_single_cpu.o : ../../../../RandomAccess/core_single_cpu.c $(HDEP15) $(CC) -o ../../../../RandomAccess/core_single_cpu.o -c ../../../../RandomAccess/core_single_cpu.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/heap.o : ../../../../RandomAccess/heap.c $(HDEP15) $(CC) -o ../../../../RandomAccess/heap.o -c ../../../../RandomAccess/heap.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/pool.o : ../../../../RandomAccess/pool.c $(HDEP15) $(CC) -o ../../../../RandomAccess/pool.o -c ../../../../RandomAccess/pool.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/single_cpu_lcg.o : ../../../../RandomAccess/single_cpu_lcg.c $(HDEP15) $(CC) -o ../../../../RandomAccess/single_cpu_lcg.o -c ../../../../RandomAccess/single_cpu_lcg.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/single_cpu.o : ../../../../RandomAccess/single_cpu.c $(HDEP15) $(CC) -o ../../../../RandomAccess/single_cpu.o -c ../../../../RandomAccess/single_cpu.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/star_single_cpu_lcg.o : ../../../../RandomAccess/star_single_cpu_lcg.c $(HDEP15) $(CC) -o ../../../../RandomAccess/star_single_cpu_lcg.o -c ../../../../RandomAccess/star_single_cpu_lcg.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/star_single_cpu.o : ../../../../RandomAccess/star_single_cpu.c $(HDEP15) $(CC) -o ../../../../RandomAccess/star_single_cpu.o -c ../../../../RandomAccess/star_single_cpu.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/time_bound.o : ../../../../RandomAccess/time_bound.c $(HDEP15) $(CC) -o ../../../../RandomAccess/time_bound.o -c ../../../../RandomAccess/time_bound.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/utility.o : ../../../../RandomAccess/utility.c $(HDEP15) $(CC) -o ../../../../RandomAccess/utility.o -c ../../../../RandomAccess/utility.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/verification_lcg.o : ../../../../RandomAccess/verification_lcg.c $(HDEP15) $(CC) -o ../../../../RandomAccess/verification_lcg.o -c ../../../../RandomAccess/verification_lcg.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/verification.o : ../../../../RandomAccess/verification.c $(HDEP15) $(CC) -o ../../../../RandomAccess/verification.o -c ../../../../RandomAccess/verification.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccess_vanilla.o : ../../../../RandomAccess/MPIRandomAccess_vanilla.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccess_vanilla.o -c ../../../../RandomAccess/MPIRandomAccess_vanilla.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccess_opt.o : ../../../../RandomAccess/MPIRandomAccess_opt.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccess_opt.o -c ../../../../RandomAccess/MPIRandomAccess_opt.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccessLCG.o : ../../../../RandomAccess/MPIRandomAccessLCG.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccessLCG.o -c ../../../../RandomAccess/MPIRandomAccessLCG.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccessLCG_vanilla.o : ../../../../RandomAccess/MPIRandomAccessLCG_vanilla.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccessLCG_vanilla.o -c ../../../../RandomAccess/MPIRandomAccessLCG_vanilla.c -I../../../../include $(CCFLAGS) ../../../../RandomAccess/MPIRandomAccessLCG_opt.o : ../../../../RandomAccess/MPIRandomAccessLCG_opt.c $(HDEP15) $(CC) -o ../../../../RandomAccess/MPIRandomAccessLCG_opt.o -c ../../../../RandomAccess/MPIRandomAccessLCG_opt.c -I../../../../include $(CCFLAGS) ../../../../STREAM/onecpu.o : ../../../../STREAM/onecpu.c $(HDEP16) $(CC) -o ../../../../STREAM/onecpu.o -c ../../../../STREAM/onecpu.c -I../../../../include $(CCFLAGS) ../../../../STREAM/stream.o : ../../../../STREAM/stream.c $(HDEP16) $(CC) -o ../../../../STREAM/stream.o -c ../../../../STREAM/stream.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/pmatgeninc.o : ../../../../PTRANS/pmatgeninc.c $(HDEP17) $(CC) -o ../../../../PTRANS/pmatgeninc.o -c ../../../../PTRANS/pmatgeninc.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/pdmatgen.o : ../../../../PTRANS/pdmatgen.c $(HDEP17) $(CC) -o ../../../../PTRANS/pdmatgen.o -c ../../../../PTRANS/pdmatgen.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/pdtransdriver.o : ../../../../PTRANS/pdtransdriver.c $(HDEP17) $(CC) -o ../../../../PTRANS/pdtransdriver.o -c ../../../../PTRANS/pdtransdriver.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/pdmatcmp.o : ../../../../PTRANS/pdmatcmp.c $(HDEP17) $(CC) -o ../../../../PTRANS/pdmatcmp.o -c ../../../../PTRANS/pdmatcmp.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/pdtrans.o : ../../../../PTRANS/pdtrans.c $(HDEP17) $(CC) -o ../../../../PTRANS/pdtrans.o -c ../../../../PTRANS/pdtrans.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/sclapack.o : ../../../../PTRANS/sclapack.c $(HDEP17) $(CC) -o ../../../../PTRANS/sclapack.o -c ../../../../PTRANS/sclapack.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/cblacslt.o : ../../../../PTRANS/cblacslt.c $(HDEP17) $(CC) -o ../../../../PTRANS/cblacslt.o -c ../../../../PTRANS/cblacslt.c -I../../../../include $(CCFLAGS) ../../../../PTRANS/mem.o : ../../../../PTRANS/mem.c $(HDEP17) $(CC) -o ../../../../PTRANS/mem.o -c ../../../../PTRANS/mem.c -I../../../../include $(CCFLAGS) ../../../../src/bench_lat_bw_1.5.2.o : ../../../../src/bench_lat_bw_1.5.2.c $(HDEP18) $(CC) -o ../../../../src/bench_lat_bw_1.5.2.o -c ../../../../src/bench_lat_bw_1.5.2.c -I../../../../include $(CCFLAGS) ../../../../src/hpcc.o : ../../../../src/hpcc.c $(HDEP18) $(CC) -o ../../../../src/hpcc.o -c ../../../../src/hpcc.c -I../../../../include $(CCFLAGS) ../../../../src/io.o : ../../../../src/io.c $(HDEP18) $(CC) -o ../../../../src/io.o -c ../../../../src/io.c -I../../../../include $(CCFLAGS) ../../../../src/extinit.o : ../../../../src/extinit.c $(HDEP18) $(CC) -o ../../../../src/extinit.o -c ../../../../src/extinit.c -I../../../../include $(CCFLAGS) ../../../../src/extfinalize.o : ../../../../src/extfinalize.c $(HDEP18) $(CC) -o ../../../../src/extfinalize.o -c ../../../../src/extfinalize.c -I../../../../include $(CCFLAGS) ../../../../src/HPL_slamch.o : ../../../../src/HPL_slamch.c $(HDEP19) $(CC) -o ../../../../src/HPL_slamch.o -c ../../../../src/HPL_slamch.c -I../../../../include $(CCNOOPT) ../../../../src/noopt.o : ../../../../src/noopt.c $(HDEP19) $(CC) -o ../../../../src/noopt.o -c ../../../../src/noopt.c -I../../../../include $(CCNOOPT) ../../../../DGEMM/tstdgemm.o : ../../../../DGEMM/tstdgemm.c $(HDEP20) $(CC) -o ../../../../DGEMM/tstdgemm.o -c ../../../../DGEMM/tstdgemm.c -I../../../../include $(CCFLAGS) ../../../../DGEMM/onecpu.o : ../../../../DGEMM/onecpu.c $(HDEP20) $(CC) -o ../../../../DGEMM/onecpu.o -c ../../../../DGEMM/onecpu.c -I../../../../include $(CCFLAGS) ../../../../FFT/bcnrand.o : ../../../../FFT/bcnrand.c $(HDEP21) $(CC) -o ../../../../FFT/bcnrand.o -c ../../../../FFT/bcnrand.c -I../../../../include $(CCFLAGS) ../../../../FFT/fft235.o : ../../../../FFT/fft235.c $(HDEP21) $(CC) -o ../../../../FFT/fft235.o -c ../../../../FFT/fft235.c -I../../../../include $(CCFLAGS) ../../../../FFT/zfft1d.o : ../../../../FFT/zfft1d.c $(HDEP21) $(CC) -o ../../../../FFT/zfft1d.o -c ../../../../FFT/zfft1d.c -I../../../../include $(CCFLAGS) ../../../../FFT/pzfft1d.o : ../../../../FFT/pzfft1d.c $(HDEP21) $(CC) -o ../../../../FFT/pzfft1d.o -c ../../../../FFT/pzfft1d.c -I../../../../include $(CCFLAGS) ../../../../FFT/onecpu.o : ../../../../FFT/onecpu.c $(HDEP21) $(CC) -o ../../../../FFT/onecpu.o -c ../../../../FFT/onecpu.c -I../../../../include $(CCFLAGS) ../../../../FFT/tstfft.o : ../../../../FFT/tstfft.c $(HDEP21) $(CC) -o ../../../../FFT/tstfft.o -c ../../../../FFT/tstfft.c -I../../../../include $(CCFLAGS) ../../../../FFT/wrapfftw.o : ../../../../FFT/wrapfftw.c $(HDEP21) $(CC) -o ../../../../FFT/wrapfftw.o -c ../../../../FFT/wrapfftw.c -I../../../../include $(CCFLAGS) ../../../../FFT/wrapmpifftw.o : ../../../../FFT/wrapmpifftw.c $(HDEP21) $(CC) -o ../../../../FFT/wrapmpifftw.o -c ../../../../FFT/wrapmpifftw.c -I../../../../include $(CCFLAGS) ../../../../FFT/mpifft.o : ../../../../FFT/mpifft.c $(HDEP21) $(CC) -o ../../../../FFT/mpifft.o -c ../../../../FFT/mpifft.c -I../../../../include $(CCFLAGS) hpcc-1.4.1/hpl/makes/Make.auxil0000644000000000000000000001171411256503657013143 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h # ## Object files ######################################################## # HPL_au0obj = \ HPL_dlacpy.o HPL_dlatcpy.o HPL_fprintf.o \ HPL_warn.o HPL_abort.o HPL_dlaprnt.o \ HPL_dlange.o HPL_au1obj = \ HPL_dlamch.o HPL_auxobj = \ $(HPL_au0obj) $(HPL_au1obj) # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_auxobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_auxobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_dlacpy.o : ../HPL_dlacpy.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlacpy.c HPL_dlatcpy.o : ../HPL_dlatcpy.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlatcpy.c HPL_fprintf.o : ../HPL_fprintf.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_fprintf.c HPL_warn.o : ../HPL_warn.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_warn.c HPL_abort.o : ../HPL_abort.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_abort.c HPL_dlaprnt.o : ../HPL_dlaprnt.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaprnt.c HPL_dlange.o : ../HPL_dlange.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlange.c HPL_dlamch.o : ../HPL_dlamch.c $(INCdep) $(CC) -o $@ -c $(CCNOOPT) ../HPL_dlamch.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.blas0000644000000000000000000001175511256503657012747 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h # ## Object files ######################################################## # HPL_blaobj = \ HPL_dcopy.o HPL_daxpy.o HPL_dscal.o \ HPL_idamax.o HPL_dgemv.o HPL_dtrsv.o \ HPL_dger.o HPL_dgemm.o HPL_dtrsm.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_blaobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_blaobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_dcopy.o : ../HPL_dcopy.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dcopy.c HPL_daxpy.o : ../HPL_daxpy.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_daxpy.c HPL_dscal.o : ../HPL_dscal.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dscal.c HPL_idamax.o : ../HPL_idamax.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_idamax.c HPL_dgemv.o : ../HPL_dgemv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemv.c HPL_dtrsv.o : ../HPL_dtrsv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsv.c HPL_dger.o : ../HPL_dger.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dger.c HPL_dgemm.o : ../HPL_dgemm.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgemm.c HPL_dtrsm.o : ../HPL_dtrsm.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtrsm.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.comm0000644000000000000000000001327111256503657012754 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h \ $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h # ## Object files ######################################################## # HPL_comobj = \ HPL_1ring.o HPL_1rinM.o HPL_2ring.o \ HPL_2rinM.o HPL_blong.o HPL_blonM.o \ HPL_packL.o HPL_copyL.o HPL_binit.o \ HPL_bcast.o HPL_bwait.o HPL_send.o \ HPL_recv.o HPL_sdrv.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_comobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_comobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_1ring.o : ../HPL_1ring.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_1ring.c HPL_1rinM.o : ../HPL_1rinM.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_1rinM.c HPL_2ring.o : ../HPL_2ring.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_2ring.c HPL_2rinM.o : ../HPL_2rinM.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_2rinM.c HPL_blong.o : ../HPL_blong.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_blong.c HPL_blonM.o : ../HPL_blonM.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_blonM.c HPL_packL.o : ../HPL_packL.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_packL.c HPL_copyL.o : ../HPL_copyL.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_copyL.c HPL_binit.o : ../HPL_binit.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_binit.c HPL_bcast.o : ../HPL_bcast.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_bcast.c HPL_bwait.o : ../HPL_bwait.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_bwait.c HPL_send.o : ../HPL_send.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_send.c HPL_recv.o : ../HPL_recv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_recv.c HPL_sdrv.o : ../HPL_sdrv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_sdrv.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.gesv0000644000000000000000000001023211256503657012757 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_gesv.h # ## Object files ######################################################## # HPL_gesobj = \ HPL_dgesv.o HPL_ipid.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_gesobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_gesobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_dgesv.o : ../HPL_dgesv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dgesv.c HPL_ipid.o : ../HPL_ipid.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ipid.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.grid0000644000000000000000000001242511256503657012746 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h # ## Object files ######################################################## # HPL_griobj = \ HPL_grid_init.o HPL_pnum.o HPL_grid_info.o \ HPL_grid_exit.o HPL_broadcast.o HPL_reduce.o \ HPL_all_reduce.o HPL_barrier.o HPL_min.o \ HPL_max.o HPL_sum.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_griobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_griobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_grid_init.o : ../HPL_grid_init.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_init.c HPL_pnum.o : ../HPL_pnum.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pnum.c HPL_grid_info.o : ../HPL_grid_info.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_info.c HPL_grid_exit.o : ../HPL_grid_exit.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_grid_exit.c HPL_broadcast.o : ../HPL_broadcast.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_broadcast.c HPL_reduce.o : ../HPL_reduce.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_reduce.c HPL_all_reduce.o : ../HPL_all_reduce.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_all_reduce.c HPL_barrier.o : ../HPL_barrier.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_barrier.c HPL_min.o : ../HPL_min.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_min.c HPL_max.o : ../HPL_max.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_max.c HPL_sum.o : ../HPL_sum.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_sum.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.matgen0000644000000000000000000001144611256503657013276 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_matgen.h # ## Object files ######################################################## # HPL_matobj = \ HPL_dmatgen.o HPL_ladd.o HPL_lmul.o \ HPL_xjumpm.o HPL_jumpit.o HPL_rand.o \ HPL_setran.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_matobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_matobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_dmatgen.o : ../HPL_dmatgen.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dmatgen.c HPL_ladd.o : ../HPL_ladd.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ladd.c HPL_lmul.o : ../HPL_lmul.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_lmul.c HPL_xjumpm.o : ../HPL_xjumpm.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_xjumpm.c HPL_jumpit.o : ../HPL_jumpit.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_jumpit.c HPL_rand.o : ../HPL_rand.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_rand.c HPL_setran.o : ../HPL_setran.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_setran.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.panel0000644000000000000000000001112111256503657013110 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ $(INCdir)/hpl_pgesv.h # ## Object files ######################################################## # HPL_panobj = \ HPL_pdpanel_new.o HPL_pdpanel_init.o HPL_pdpanel_disp.o \ HPL_pdpanel_free.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_panobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_panobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_pdpanel_new.o : ../HPL_pdpanel_new.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_new.c HPL_pdpanel_init.o : ../HPL_pdpanel_init.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_init.c HPL_pdpanel_disp.o : ../HPL_pdpanel_disp.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_disp.c HPL_pdpanel_free.o : ../HPL_pdpanel_free.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanel_free.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.pauxil0000644000000000000000000001625111256503657013324 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_pauxil.h # ## Object files ######################################################## # HPL_pauobj = \ HPL_indxg2l.o HPL_indxg2lp.o HPL_indxg2p.o \ HPL_indxl2g.o HPL_infog2l.o HPL_numroc.o \ HPL_numrocI.o HPL_dlaswp00N.o HPL_dlaswp10N.o \ HPL_dlaswp01N.o HPL_dlaswp01T.o HPL_dlaswp02N.o \ HPL_dlaswp03N.o HPL_dlaswp03T.o HPL_dlaswp04N.o \ HPL_dlaswp04T.o HPL_dlaswp05N.o HPL_dlaswp05T.o \ HPL_dlaswp06N.o HPL_dlaswp06T.o HPL_pwarn.o \ HPL_pabort.o HPL_pdlaprnt.o HPL_pdlamch.o \ HPL_pdlange.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_pauobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pauobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_indxg2l.o : ../HPL_indxg2l.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2l.c HPL_indxg2lp.o : ../HPL_indxg2lp.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2lp.c HPL_indxg2p.o : ../HPL_indxg2p.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxg2p.c HPL_indxl2g.o : ../HPL_indxl2g.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_indxl2g.c HPL_infog2l.o : ../HPL_infog2l.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_infog2l.c HPL_numroc.o : ../HPL_numroc.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_numroc.c HPL_numrocI.o : ../HPL_numrocI.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_numrocI.c HPL_dlaswp00N.o : ../HPL_dlaswp00N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp00N.c HPL_dlaswp10N.o : ../HPL_dlaswp10N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp10N.c HPL_dlaswp01N.o : ../HPL_dlaswp01N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01N.c HPL_dlaswp01T.o : ../HPL_dlaswp01T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp01T.c HPL_dlaswp02N.o : ../HPL_dlaswp02N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp02N.c HPL_dlaswp03N.o : ../HPL_dlaswp03N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03N.c HPL_dlaswp03T.o : ../HPL_dlaswp03T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp03T.c HPL_dlaswp04N.o : ../HPL_dlaswp04N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04N.c HPL_dlaswp04T.o : ../HPL_dlaswp04T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp04T.c HPL_dlaswp05N.o : ../HPL_dlaswp05N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05N.c HPL_dlaswp05T.o : ../HPL_dlaswp05T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp05T.c HPL_dlaswp06N.o : ../HPL_dlaswp06N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06N.c HPL_dlaswp06T.o : ../HPL_dlaswp06T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlaswp06T.c HPL_pwarn.o : ../HPL_pwarn.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pwarn.c HPL_pabort.o : ../HPL_pabort.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pabort.c HPL_pdlaprnt.o : ../HPL_pdlaprnt.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaprnt.c HPL_pdlamch.o : ../HPL_pdlamch.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlamch.c HPL_pdlange.o : ../HPL_pdlange.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlange.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.pfact0000644000000000000000000001424411256503657013117 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pfact.h # ## Object files ######################################################## # HPL_pfaobj = \ HPL_dlocmax.o HPL_dlocswpN.o HPL_dlocswpT.o \ HPL_pdmxswp.o HPL_pdpancrN.o HPL_pdpancrT.o \ HPL_pdpanllN.o HPL_pdpanllT.o HPL_pdpanrlN.o \ HPL_pdpanrlT.o HPL_pdrpanllN.o HPL_pdrpanllT.o \ HPL_pdrpancrN.o HPL_pdrpancrT.o HPL_pdrpanrlN.o \ HPL_pdrpanrlT.o HPL_pdfact.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_pfaobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pfaobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_dlocmax.o : ../HPL_dlocmax.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocmax.c HPL_dlocswpN.o : ../HPL_dlocswpN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpN.c HPL_dlocswpT.o : ../HPL_dlocswpT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dlocswpT.c HPL_pdmxswp.o : ../HPL_pdmxswp.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmxswp.c HPL_pdpancrN.o : ../HPL_pdpancrN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrN.c HPL_pdpancrT.o : ../HPL_pdpancrT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpancrT.c HPL_pdpanllN.o : ../HPL_pdpanllN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllN.c HPL_pdpanllT.o : ../HPL_pdpanllT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanllT.c HPL_pdpanrlN.o : ../HPL_pdpanrlN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlN.c HPL_pdpanrlT.o : ../HPL_pdpanrlT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdpanrlT.c HPL_pdrpanllN.o : ../HPL_pdrpanllN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllN.c HPL_pdrpanllT.o : ../HPL_pdrpanllT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanllT.c HPL_pdrpancrN.o : ../HPL_pdrpancrN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrN.c HPL_pdrpancrT.o : ../HPL_pdrpancrT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpancrT.c HPL_pdrpanrlN.o : ../HPL_pdrpanrlN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlN.c HPL_pdrpanrlT.o : ../HPL_pdrpanrlT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdrpanrlT.c HPL_pdfact.o : ../HPL_pdfact.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdfact.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.pgesv0000644000000000000000000001617311256503657013151 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_grid.h $(INCdir)/hpl_comm.h \ $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_panel.h $(INCdir)/hpl_pfact.h \ $(INCdir)/hpl_pgesv.h # ## Object files ######################################################## # HPL_pgeobj = \ HPL_pipid.o HPL_plindx0.o HPL_pdlaswp00N.o \ HPL_pdlaswp00T.o HPL_perm.o HPL_logsort.o \ HPL_plindx10.o HPL_plindx1.o HPL_spreadN.o \ HPL_spreadT.o HPL_rollN.o HPL_rollT.o \ HPL_equil.o HPL_pdlaswp01N.o HPL_pdlaswp01T.o \ HPL_pdupdateNN.o HPL_pdupdateNT.o HPL_pdupdateTN.o \ HPL_pdupdateTT.o HPL_pdtrsv.o HPL_pdgesv0.o \ HPL_pdgesvK1.o HPL_pdgesvK2.o HPL_pdgesv.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_pgeobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pgeobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_pipid.o : ../HPL_pipid.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pipid.c HPL_plindx0.o : ../HPL_plindx0.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx0.c HPL_pdlaswp00N.o : ../HPL_pdlaswp00N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00N.c HPL_pdlaswp00T.o : ../HPL_pdlaswp00T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp00T.c HPL_perm.o : ../HPL_perm.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_perm.c HPL_logsort.o : ../HPL_logsort.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_logsort.c HPL_plindx10.o : ../HPL_plindx10.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx10.c HPL_plindx1.o : ../HPL_plindx1.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_plindx1.c HPL_spreadN.o : ../HPL_spreadN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadN.c HPL_spreadT.o : ../HPL_spreadT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_spreadT.c HPL_rollN.o : ../HPL_rollN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollN.c HPL_rollT.o : ../HPL_rollT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_rollT.c HPL_equil.o : ../HPL_equil.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_equil.c HPL_pdlaswp01N.o : ../HPL_pdlaswp01N.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01N.c HPL_pdlaswp01T.o : ../HPL_pdlaswp01T.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdlaswp01T.c HPL_pdupdateNN.o : ../HPL_pdupdateNN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNN.c HPL_pdupdateNT.o : ../HPL_pdupdateNT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateNT.c HPL_pdupdateTN.o : ../HPL_pdupdateTN.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTN.c HPL_pdupdateTT.o : ../HPL_pdupdateTT.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdupdateTT.c HPL_pdtrsv.o : ../HPL_pdtrsv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtrsv.c HPL_pdgesv0.o : ../HPL_pdgesv0.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv0.c HPL_pdgesvK1.o : ../HPL_pdgesvK1.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK1.c HPL_pdgesvK2.o : ../HPL_pdgesvK2.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesvK2.c HPL_pdgesv.o : ../HPL_pdgesv.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdgesv.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.pmatgen0000644000000000000000000001007511256503657013453 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_pmisc.h \ $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_pmatgen.h # ## Object files ######################################################## # HPL_pmaobj = \ HPL_pdmatgen.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_pmaobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pmaobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_pdmatgen.o : ../HPL_pdmatgen.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdmatgen.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.ptest0000644000000000000000000001143111256503657013154 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_gesv.h $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_pauxil.h \ $(INCdir)/hpl_panel.h $(INCdir)/hpl_pgesv.h $(INCdir)/hpl_pmatgen.h \ $(INCdir)/hpl_ptimer.h $(INCdir)/hpl_ptest.h # ## Executable names #################################################### # xhpl = $(BINdir)/xhpl # ## Object files ######################################################## # HPL_pteobj = \ HPL_pddriver.o HPL_pdinfo.o HPL_pdtest.o # ## Targets ############################################################# # all : lib.grd # dexe # dexe : dexe.grd # $(BINdir)/HPL.dat : ../HPL.dat ( $(CP) ../HPL.dat $(BINdir) ) # dexe.grd: $(HPL_pteobj) $(HPLlib) $(LINKER) $(LINKFLAGS) -o $(xhpl) $(HPL_pteobj) $(HPL_LIBS) $(MAKE) $(BINdir)/HPL.dat $(TOUCH) dexe.grd # lib.grd : $(HPL_pteobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_pteobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_pddriver.o : ../HPL_pddriver.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pddriver.c HPL_pdinfo.o : ../HPL_pdinfo.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdinfo.c HPL_pdtest.o : ../HPL_pdtest.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_pdtest.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.ptimer0000644000000000000000000001041111256503657013312 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_ptimer.h # ## Object files ######################################################## # HPL_ptiobj = \ HPL_ptimer.o HPL_ptimer_cputime.o HPL_ptimer_walltime.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_ptiobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_ptiobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_ptimer.o : ../HPL_ptimer.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer.c HPL_ptimer_cputime.o : ../HPL_ptimer_cputime.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_cputime.c HPL_ptimer_walltime.o : ../HPL_ptimer_walltime.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ptimer_walltime.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.test0000644000000000000000000001111611256503657012774 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_misc.h $(INCdir)/hpl_blas.h $(INCdir)/hpl_auxil.h \ $(INCdir)/hpl_gesv.h $(INCdir)/hpl_matgen.h $(INCdir)/hpl_timer.h \ $(INCdir)/hpl_test.h # ## Executable names #################################################### # xlinpack = $(BINdir)/xlinpack # ## Object files ######################################################## # HPL_tesobj = \ HPL_ddriver.o HPL_dinfo.o HPL_dtest.o # ## Targets ############################################################# # all : dexe # dexe : dexe.grd # $(BINdir)/LINPACK.dat : ../LINPACK.dat ( $(CP) ../LINPACK.dat $(BINdir) ) # dexe.grd: $(HPL_tesobj) $(HPLlib) $(LINKER) $(LINKFLAGS) -o $(xlinpack) $(HPL_tesobj) HPL_make_libs $(MAKE) $(BINdir)/LINPACK.dat $(TOUCH) dexe.grd # # ###################################################################### # HPL_ddriver.o : ../HPL_ddriver.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_ddriver.c HPL_dinfo.o : ../HPL_dinfo.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dinfo.c HPL_dtest.o : ../HPL_dtest.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_dtest.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.timer0000644000000000000000000001040311256503657013133 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ $(INCdir)/hpl_pmisc.h $(INCdir)/hpl_timer.h # ## Object files ######################################################## # HPL_timobj = \ HPL_timer.o HPL_timer_cputime.o HPL_timer_walltime.o # ## Targets ############################################################# # all : lib # lib : lib.grd # lib.grd : $(HPL_timobj) $(ARCHIVER) $(ARFLAGS) $(HPLlib) $(HPL_timobj) $(RANLIB) $(HPLlib) $(TOUCH) lib.grd # # ###################################################################### # HPL_timer.o : ../HPL_timer.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer.c HPL_timer_cputime.o : ../HPL_timer_cputime.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_cputime.c HPL_timer_walltime.o : ../HPL_timer_walltime.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_timer_walltime.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/makes/Make.units0000644000000000000000000001322311256503657013160 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # include Make.inc # # ###################################################################### # INCdep = \ @rout Make.units $(INCdir)/hpl_misc.h $(INCdir)/hpl_auxil.h $(INCdir)/hpl_pmisc.h \ $(INCdir)/hpl_pauxil.h $(INCdir)/hpl_units.h # ## Executable names #################################################### # xunits = $(BINdir)/xunits # ## Object files ######################################################## # HPL_uniobj = \ HPL_unit_driver.o HPL_unit_info.o HPL_unit_indxg2l.o \ HPL_chek_indxg2l.o HPL_unit_indxg2p.o HPL_chek_indxg2p.o \ HPL_unit_indxl2g.o HPL_chek_indxl2g.o HPL_unit_numroc.o \ HPL_unit_numrocI.o HPL_chek_numrocI.o # ## Targets ############################################################# # all : dexe # dexe : dexe.grd # $(BINdir)/UNITS.dat : ../UNITS.dat ( $(CP) ../UNITS.dat $(BINdir) ) # dexe.grd : $(HPL_uniobj) $(HPLlib) $(LINKER) $(LINKFLAGS) -o $(xunits) $(HPL_uniobj) @(hpllibs) $(MAKE) $(BINdir)/UNITS.dat $(TOUCH) dexe.grd # # ###################################################################### # HPL_unit_driver.o : ../HPL_unit_driver.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_driver.c HPL_unit_info.o : ../HPL_unit_info.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_info.c HPL_unit_indxg2l.o : ../HPL_unit_indxg2l.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2l.c HPL_chek_indxg2l.o : ../HPL_chek_indxg2l.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2l.c HPL_unit_indxg2p.o : ../HPL_unit_indxg2p.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxg2p.c HPL_chek_indxg2p.o : ../HPL_chek_indxg2p.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxg2p.c HPL_unit_indxl2g.o : ../HPL_unit_indxl2g.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_indxl2g.c HPL_chek_indxl2g.o : ../HPL_chek_indxl2g.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_indxl2g.c HPL_unit_numroc.o : ../HPL_unit_numroc.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numroc.c HPL_unit_numrocI.o : ../HPL_unit_numrocI.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_unit_numrocI.c HPL_chek_numrocI.o : ../HPL_chek_numrocI.c $(INCdep) $(CC) -o $@ -c $(CCFLAGS) ../HPL_chek_numrocI.c # # ###################################################################### # clean : $(RM) *.o *.grd # # ###################################################################### hpcc-1.4.1/hpl/man/man3/HPL_abort.30000644000000000000000000000230511256503657013425 00000000000000.TH HPL_abort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_abort \- halts execution. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_abort(\fR \fB\&int\fR \fI\&LINE\fR, \fB\&const char *\fR \fI\&SRNAME\fR, \fB\&const char *\fR \fI\&FORM\fR, \fB\&...\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_abort\fR displays an error message on stderr and halts execution. .SH ARGUMENTS .TP 8 LINE (local input) int On entry, LINE specifies the line number in the file where the error has occured. When LINE is not a positive line number, it is ignored. .TP 8 SRNAME (local input) const char * On entry, SRNAME should be the name of the routine calling this error handler. .TP 8 FORM (local input) const char * On entry, FORM specifies the format, i.e., how the subsequent arguments are converted for output. .TP 8 (local input) ... On entry, ... is the list of arguments to be printed within the format string. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br HPL_abort( __LINE__, __FILE__, "Halt.\en" ); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_fprintf \ (3), .BR HPL_warn \ (3). hpcc-1.4.1/hpl/man/man3/HPL_all_reduce.30000644000000000000000000000252011256503657014414 00000000000000.TH HPL_all_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_all_reduce \- All reduce operation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_all_reduce(\fR \fB\&void *\fR \fI\&BUFFER\fR, \fB\&const int\fR \fI\&COUNT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR, \fB\&const HPL_T_OP \fR \fI\&OP\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_all_reduce\fR performs a global reduce operation across all processes of a group leaving the results on all processes. .SH ARGUMENTS .TP 8 BUFFER (local input/global out void * On entry, BUFFER points to the buffer to be combined. On exit, this array contains the combined data and is identical on all processes in the group. .TP 8 COUNT (global input) const int On entry, COUNT indicates the number of entries in BUFFER. COUNT must be at least zero. .TP 8 DTYPE (global input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .TP 8 OP (global input) const HPL_T_OP On entry, OP is a pointer to the local combine function. .TP 8 COMM (global/local input) MPI_Comm The MPI communicator identifying the process collection. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_min \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_barrier.30000644000000000000000000000124111256503657013742 00000000000000.TH HPL_barrier 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_barrier \- Barrier operation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_barrier(\fR \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_barrier\fR blocks the caller until all process members have call it. The call returns at any process only after all group members have entered the call. .SH ARGUMENTS .TP 8 COMM (global/local input) MPI_Comm The MPI communicator identifying the process collection. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_reduce \ (3), .BR HPL_all_reduce \ (3), .BR HPL_min \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_bcast.30000644000000000000000000000156311256503657013417 00000000000000.TH HPL_bcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_bcast \- Perform the row broadcast. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_bcast(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&int *\fR \fI\&IFLAG\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_bcast\fR broadcasts the current panel. Successful completion is indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was not completed, in which case this function should be called again. .SH ARGUMENTS .TP 8 PANEL (input/output) HPL_T_panel * On entry, PANEL points to the current panel data structure being broadcast. .TP 8 IFLAG (output) int * On exit, IFLAG indicates whether or not the broadcast has occured. .SH SEE ALSO .BR HPL_binit \ (3), .BR HPL_bwait \ (3). hpcc-1.4.1/hpl/man/man3/HPL_binit.30000644000000000000000000000111111256503657013415 00000000000000.TH HPL_binit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_binit \- Initialize the row broadcast. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_binit(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_binit\fR initializes a row broadcast. Successful completion is indicated by the returned error code HPL_SUCCESS. .SH ARGUMENTS .TP 8 PANEL (input/output) HPL_T_panel * On entry, PANEL points to the current panel data structure being broadcast. .SH SEE ALSO .BR HPL_bcast \ (3), .BR HPL_bwait \ (3). hpcc-1.4.1/hpl/man/man3/HPL_broadcast.30000644000000000000000000000244411256503657014264 00000000000000.TH HPL_broadcast 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_broadcast \- Broadcast operation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_broadcast(\fR \fB\&void *\fR \fI\&BUFFER\fR, \fB\&const int\fR \fI\&COUNT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR, \fB\&const int\fR \fI\&ROOT\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_broadcast\fR broadcasts a message from the process with rank ROOT to all processes in the group. .SH ARGUMENTS .TP 8 BUFFER (local input/output) void * On entry, BUFFER points to the buffer to be broadcast. On exit, this array contains the broadcast data and is identical on all processes in the group. .TP 8 COUNT (global input) const int On entry, COUNT indicates the number of entries in BUFFER. COUNT must be at least zero. .TP 8 DTYPE (global input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .TP 8 ROOT (global input) const int On entry, ROOT is the coordinate of the source process. .TP 8 COMM (global/local input) MPI_Comm The MPI communicator identifying the process collection. .SH SEE ALSO .BR HPL_reduce \ (3), .BR HPL_all_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_min \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_bwait.30000644000000000000000000000116711256503657013431 00000000000000.TH HPL_bwait 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_bwait \- Finalize the row broadcast. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_bwait(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_bwait\fR HPL_bwait waits for the row broadcast of the current panel to terminate. Successful completion is indicated by the returned error code HPL_SUCCESS. .SH ARGUMENTS .TP 8 PANEL (input/output) HPL_T_panel * On entry, PANEL points to the current panel data structure being broadcast. .SH SEE ALSO .BR HPL_binit \ (3), .BR HPL_bcast \ (3). hpcc-1.4.1/hpl/man/man3/HPL_copyL.30000644000000000000000000000144111256503657013404 00000000000000.TH HPL_copyL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_copyL \- Copy the current panel into a contiguous workspace. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_copyL(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_copyL\fR copies the panel of columns, the L1 replicated submatrix, the pivot array and the info scalar into a contiguous workspace for later broadcast. The copy of this panel into a contiguous buffer can be enforced by specifying -DHPL_COPY_L in the architecture specific Makefile. .SH ARGUMENTS .TP 8 PANEL (input/output) HPL_T_panel * On entry, PANEL points to the current panel data structure being broadcast. .SH SEE ALSO .BR HPL_binit \ (3), .BR HPL_bcast \ (3), .BR HPL_bwait \ (3). hpcc-1.4.1/hpl/man/man3/HPL_daxpy.30000644000000000000000000000361611256503657013451 00000000000000.TH HPL_daxpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_daxpy \- y := y + alpha * x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_daxpy(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&const double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR, \fB\&double *\fR \fI\&Y\fR, \fB\&const int\fR \fI\&INCY\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_daxpy\fR scales the vector x by alpha and adds it to y. .SH ARGUMENTS .TP 8 N (local input) const int On entry, N specifies the length of the vectors x and y. N must be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero, then the entries of the incremented array X need not be set on input. .TP 8 X (local input) const double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .TP 8 Y (local input/output) double * On entry, Y is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. On exit, the entries of the incremented array Y are updated with the scaled entries of the incremented array X. .TP 8 INCY (local input) const int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double x[3], y[3]; .br x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; .br y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; .br HPL_daxpy( 3, 2.0, x, 1, y, 1 ); .br printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dcopy \ (3), .BR HPL_dscal \ (3), .BR HPL_dswap \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dcopy.30000644000000000000000000000320111256503657013430 00000000000000.TH HPL_dcopy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dcopy \- y := x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dcopy(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR, \fB\&double *\fR \fI\&Y\fR, \fB\&const int\fR \fI\&INCY\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dcopy\fR copies the vector x into the vector y. .SH ARGUMENTS .TP 8 N (local input) const int On entry, N specifies the length of the vectors x and y. N must be at least zero. .TP 8 X (local input) const double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .TP 8 Y (local input/output) double * On entry, Y is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. On exit, the entries of the incremented array Y are updated with the entries of the incremented array X. .TP 8 INCY (local input) const int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double x[3], y[3]; .br x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; .br y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; .br HPL_dcopy( 3, x, 1, y, 1 ); .br printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_daxpy \ (3), .BR HPL_dscal \ (3), .BR HPL_dswap \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dgemm.30000644000000000000000000001230611256503657013411 00000000000000.TH HPL_dgemm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dgemm \- C := alpha * op(A) * op(B) + beta * C. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dgemm(\fR \fB\&const enum HPL_ORDER\fR \fI\&ORDER\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANSA\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANSB\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&K\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&B\fR, \fB\&const int\fR \fI\&LDB\fR, \fB\&const double\fR \fI\&BETA\fR, \fB\&double *\fR \fI\&C\fR, \fB\&const int\fR \fI\&LDC\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dgemm\fR performs one of the matrix-matrix operations C := alpha * op( A ) * op( B ) + beta * C where op( X ) is one of op( X ) = X or op( X ) = X^T. Alpha and beta are scalars, and A, B and C are matrices, with op(A) an m by k matrix, op(B) a k by n matrix and C an m by n matrix. .SH ARGUMENTS .TP 8 ORDER (local input) const enum HPL_ORDER On entry, ORDER specifies the storage format of the operands as follows: ORDER = HplRowMajor, ORDER = HplColumnMajor. .TP 8 TRANSA (local input) const enum HPL_TRANS On entry, TRANSA specifies the form of op(A) to be used in the matrix-matrix operation follows: TRANSA==HplNoTrans : op( A ) = A, TRANSA==HplTrans : op( A ) = A^T, TRANSA==HplConjTrans : op( A ) = A^T. .TP 8 TRANSB (local input) const enum HPL_TRANS On entry, TRANSB specifies the form of op(B) to be used in the matrix-matrix operation follows: TRANSB==HplNoTrans : op( B ) = B, TRANSB==HplTrans : op( B ) = B^T, TRANSB==HplConjTrans : op( B ) = B^T. .TP 8 M (local input) const int On entry, M specifies the number of rows of the matrix op(A) and of the matrix C. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the matrix op(B) and the number of columns of the matrix C. N must be at least zero. .TP 8 K (local input) const int On entry, K specifies the number of columns of the matrix op(A) and the number of rows of the matrix op(B). K must be be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero then the elements of the matrices A and B need not be set on input. .TP 8 A (local input) const double * On entry, A is an array of dimension (LDA,ka), where ka is k when TRANSA==HplNoTrans, and is m otherwise. Before entry with TRANSA==HplNoTrans, the leading m by k part of the array A must contain the matrix A, otherwise the leading k by m part of the array A must contain the matrix A. .TP 8 LDA (local input) const int On entry, LDA specifies the first dimension of A as declared in the calling (sub) program. When TRANSA==HplNoTrans then LDA must be at least max(1,m), otherwise LDA must be at least max(1,k). .TP 8 B (local input) const double * On entry, B is an array of dimension (LDB,kb), where kb is n when TRANSB==HplNoTrans, and is k otherwise. Before entry with TRANSB==HplNoTrans, the leading k by n part of the array B must contain the matrix B, otherwise the leading n by k part of the array B must contain the matrix B. .TP 8 LDB (local input) const int On entry, LDB specifies the first dimension of B as declared in the calling (sub) program. When TRANSB==HplNoTrans then LDB must be at least max(1,k), otherwise LDB must be at least max(1,n). .TP 8 BETA (local input) const double On entry, BETA specifies the scalar beta. When BETA is supplied as zero then the elements of the matrix C need not be set on input. .TP 8 C (local input/output) double * On entry, C is an array of dimension (LDC,n). Before entry, the leading m by n part of the array C must contain the matrix C, except when beta is zero, in which case C need not be set on entry. On exit, the array C is overwritten by the m by n matrix ( alpha*op( A )*op( B ) + beta*C ). .TP 8 LDC (local input) const int On entry, LDC specifies the first dimension of C as declared in the calling (sub) program. LDC must be at least max(1,m). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], b[2*2], c[2*2]; .br a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; .br b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; .br c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0; .br HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, .br 2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 ); .br printf(" [%f,%f]\en", c[0], c[2]); .br printf("c=[%f,%f]\en", c[1], c[3]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dtrsm \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dgemv.30000644000000000000000000000713011256503657013421 00000000000000.TH HPL_dgemv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dgemv \- y := beta * y + alpha * op(A) * x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dgemv(\fR \fB\&const enum HPL_ORDER\fR \fI\&ORDER\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANS\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR, \fB\&const double\fR \fI\&BETA\fR, \fB\&double *\fR \fI\&Y\fR, \fB\&const int\fR \fI\&INCY\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dgemv\fR performs one of the matrix-vector operations y := alpha * op( A ) * x + beta * y, where op( X ) is one of op( X ) = X or op( X ) = X^T. where alpha and beta are scalars, x and y are vectors and A is an m by n matrix. .SH ARGUMENTS .TP 8 ORDER (local input) const enum HPL_ORDER On entry, ORDER specifies the storage format of the operands as follows: ORDER = HplRowMajor, ORDER = HplColumnMajor. .TP 8 TRANS (local input) const enum HPL_TRANS On entry, TRANS specifies the operation to be performed as follows: TRANS = HplNoTrans y := alpha*A *x + beta*y, TRANS = HplTrans y := alpha*A^T*x + beta*y. .TP 8 M (local input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero then A and X need not be set on input. .TP 8 A (local input) const double * On entry, A points to an array of size equal to or greater than LDA * n. Before entry, the leading m by n part of the array A must contain the matrix coefficients. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of A as declared in the calling (sub) program. LDA must be at least MAX(1,m). .TP 8 X (local input) const double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .TP 8 BETA (local input) const double On entry, BETA specifies the scalar beta. When ALPHA is supplied as zero then Y need not be set on input. .TP 8 Y (local input/output) double * On entry, Y is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. Before entry with BETA non-zero, the incremented array Y must contain the vector y. On exit, Y is overwritten by the updated vector y. .TP 8 INCY (local input) const int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], x[2], y[2]; .br a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; .br x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; .br HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0, .br a, 2, x, 1, -1.0, y, 1 ); .br printf("y=[%f,%f]\en", y[0], y[1]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dger \ (3), .BR HPL_dtrsv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dger.30000644000000000000000000000565411256503657013251 00000000000000.TH HPL_dger 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dger \- A := alpha * x * y^T + A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dger(\fR \fB\&const enum HPL_ORDER\fR \fI\&ORDER\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&const double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR, \fB\&double *\fR \fI\&Y\fR, \fB\&const int\fR \fI\&INCY\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dger\fR performs the rank 1 operation A := alpha * x * y^T + A, where alpha is a scalar, x is an m-element vector, y is an n-element vector and A is an m by n matrix. .SH ARGUMENTS .TP 8 ORDER (local input) const enum HPL_ORDER On entry, ORDER specifies the storage format of the operands as follows: ORDER = HplRowMajor, ORDER = HplColumnMajor. .TP 8 M (local input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero then X and Y need not be set on input. .TP 8 X (local input) const double * On entry, X is an incremented array of dimension at least ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .TP 8 Y (local input) double * On entry, Y is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. .TP 8 INCY (local input) const int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. .TP 8 A (local input/output) double * On entry, A points to an array of size equal to or greater than LDA * n. Before entry, the leading m by n part of the array A must contain the matrix coefficients. On exit, A is overwritten by the updated matrix. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of A as declared in the calling (sub) program. LDA must be at least MAX(1,m). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], x[2], y[2]; .br a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0; .br x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0; .br HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1, .br a, 2 ); .br printf("y=[%f,%f]\en", y[0], y[1]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dgemv \ (3), .BR HPL_dtrsv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlacpy.30000644000000000000000000000312211256503657013570 00000000000000.TH HPL_dlacpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlacpy \- B := A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlacpy(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&B\fR, \fB\&const int\fR \fI\&LDB\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlacpy\fR copies an array A into an array B. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of the arrays A and B. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the arrays A and B. N must be at least zero. .TP 8 A (local input) const double * On entry, A points to an array of dimension (LDA,N). .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 B (local output) double * On entry, B points to an array of dimension (LDB,N). On exit, B is overwritten with A. .TP 8 LDB (local input) const int On entry, LDB specifies the leading dimension of the array B. LDB must be at least MAX(1,M). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], b[2*2]; .br a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; .br HPL_dlacpy( 2, 2, a, 2, b, 2 ); .br printf(" [%f,%f]\en", b[0], b[2]); .br printf("b=[%f,%f]\en", b[1], b[3]); .br exit(0); .br return(0); .br } .SH SEE ALSO .BR HPL_dlatcpy \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlamch.30000644000000000000000000000576111256503657013557 00000000000000.TH HPL_dlamch 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlamch \- determines machine-specific arithmetic constants. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_dlamch(\fR \fB\&const HPL_T_MACH\fR \fI\&CMACH\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlamch\fR determines machine-specific arithmetic constants such as the relative machine precision (eps), the safe minimum (sfmin) such that 1 / sfmin does not overflow, the base of the machine (base), the precision (prec), the number of (base) digits in the mantissa (t), whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the minimum exponent before (gradual) underflow (emin), the underflow threshold (rmin) base**(emin-1), the largest exponent before overflow (emax), the overflow threshold (rmax) (base**emax)*(1-eps). .SH ARGUMENTS .TP 8 CMACH (local input) const HPL_T_MACH Specifies the value to be returned by HPL_dlamch = HPL_MACH_EPS, HPL_dlamch := eps (default) = HPL_MACH_SFMIN, HPL_dlamch := sfmin = HPL_MACH_BASE, HPL_dlamch := base = HPL_MACH_PREC, HPL_dlamch := eps*base = HPL_MACH_MLEN, HPL_dlamch := t = HPL_MACH_RND, HPL_dlamch := rnd = HPL_MACH_EMIN, HPL_dlamch := emin = HPL_MACH_RMIN, HPL_dlamch := rmin = HPL_MACH_EMAX, HPL_dlamch := emax = HPL_MACH_RMAX, HPL_dlamch := rmax where eps = relative machine precision, sfmin = safe minimum, base = base of the machine, prec = eps*base, t = number of digits in the mantissa, rnd = 1.0 if rounding occurs in addition, emin = minimum exponent before underflow, rmin = underflow threshold, emax = largest exponent before overflow, rmax = overflow threshold. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double eps; .br eps = HPL_dlamch( HPL_MACH_EPS ); .br printf("eps=%18.8e\en", eps); .br exit(0); return(0); .br } .SH REFERENCES This function has been manually translated from the Fortran 77 LAPACK auxiliary function dlamch.f (version 2.0 -- 1992), that was itself based on the function ENVRON by Malcolm and incorporated suggestions by Gentleman and Marovich. See Malcolm M. A., Algorithms to reveal properties of floating-point arithmetic., Comms. of the ACM, 15, 949-951 (1972). Gentleman W. M. and Marovich S. B., More on algorithms that reveal properties of floating point arithmetic units., Comms. of the ACM, 17, 276-277 (1974). hpcc-1.4.1/hpl/man/man3/HPL_dlange.30000644000000000000000000000365211256503657013556 00000000000000.TH HPL_dlange 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlange \- Compute ||A||. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_dlange(\fR \fB\&const HPL_T_NORM\fR \fI\&NORM\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlange\fR returns the value of the one norm, or the infinity norm, or the element of largest absolute value of a matrix A: max(abs(A(i,j))) when NORM = HPL_NORM_A, norm1(A), when NORM = HPL_NORM_1, normI(A), when NORM = HPL_NORM_I, where norm1 denotes the one norm of a matrix (maximum column sum) and normI denotes the infinity norm of a matrix (maximum row sum). Note that max(abs(A(i,j))) is not a matrix norm. .SH ARGUMENTS .TP 8 NORM (local input) const HPL_T_NORM On entry, NORM specifies the value to be returned by this function as described above. .TP 8 M (local input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 A (local input) const double * On entry, A points to an array of dimension (LDA,N), that contains the matrix A. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,M). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2]; .br a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; .br norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 ); .br printf("norm=%f\en", norm); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dlaprnt \ (3), .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaprnt.30000644000000000000000000000310411256503657013760 00000000000000.TH HPL_dlaprnt 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaprnt \- Print the matrix A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaprnt(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&IA\fR, \fB\&const int\fR \fI\&JA\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const char *\fR \fI\&CMATNM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaprnt\fR prints to standard error an M-by-N matrix A. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of A. N must be at least zero. .TP 8 A (local input) double * On entry, A points to an array of dimension (LDA,N). .TP 8 IA (local input) const int On entry, IA specifies the starting row index to be printed. .TP 8 JA (local input) const int On entry, JA specifies the starting column index to be printed. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,M). .TP 8 CMATNM (local input) const char * On entry, CMATNM is the name of the matrix to be printed. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2]; .br a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; .br HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" ); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp00N.30000644000000000000000000000327211256503657014072 00000000000000.TH HPL_dlaswp00N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp00N \- performs a series of row interchanges. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp00N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const int *\fR \fI\&IPIV\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp00N\fR performs a series of local row interchanges on a matrix A. One row interchange is initiated for rows 0 through M-1 of A. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of the array A to be interchanged. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the array A. N must be at least zero. .TP 8 A (local input/output) double * On entry, A points to an array of dimension (LDA,N) to which the row interchanges will be applied. On exit, the permuted matrix. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 IPIV (local input) const int * On entry, IPIV is an array of size M that contains the pivoting information. For k in [0..M), IPIV[k]=IROFF + l implies that local rows k and l are to be interchanged. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp01N.30000644000000000000000000000547611256503657014103 00000000000000.TH HPL_dlaswp01N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp01N \- copies rows of A into itself and into U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp01N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp01N\fR copies scattered rows of A into itself and into an array U. The row offsets in A of the source rows are specified by LINDXA. The destination of those rows are specified by LINDXAU. A positive value of LINDXAU indicates that the array destination is U, and A otherwise. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A that should be moved within A or copied into U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of rows of A that should be moved within A or copied into U. N must be at least zero. .TP 8 A (local input/output) double * On entry, A points to an array of dimension (LDA,N). The rows of this array specified by LINDXA should be moved within A or copied into U. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,N). The rows of A specified by LINDXA are be copied within this array U at the positions indicated by positive values of LINDXAU. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,M). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be moved within A or or copied into U. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M that contains the local row indexes of U where the rows of A should be copied at. This array also contains the local row offsets in A where some of the rows of A should be moved to. A positive value of LINDXAU[i] indicates that the row LINDXA[i] of A should be copied into U at the position LINDXAU[i]; otherwise the row LINDXA[i] of A should be moved at the position -LINDXAU[i] within A. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp01T.30000644000000000000000000000562011256503657014100 00000000000000.TH HPL_dlaswp01T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp01T \- copies rows of A into itself and into U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp01T(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp01T\fR copies scattered rows of A into itself and into an array U. The row offsets in A of the source rows are specified by LINDXA. The destination of those rows are specified by LINDXAU. A positive value of LINDXAU indicates that the array destination is U, and A otherwise. Rows of A are stored as columns in U. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A that should be moved within A or copied into U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of rows of A that should be moved within A or copied into U. N must be at least zero. .TP 8 A (local input/output) double * On entry, A points to an array of dimension (LDA,N). The rows of this array specified by LINDXA should be moved within A or copied into U. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,M). The rows of A specified by LINDXA are copied within this array U at the positions indicated by positive values of LINDXAU. The rows of A are stored as columns in U. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,N). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be moved within A or or copied into U. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M that contains the local row indexes of U where the rows of A should be copied at. This array also contains the local row offsets in A where some of the rows of A should be moved to. A positive value of LINDXAU[i] indicates that the row LINDXA[i] of A should be copied into U at the position LINDXAU[i]; otherwise the row LINDXA[i] of A should be moved at the position -LINDXAU[i] within A. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp02N.30000644000000000000000000000466211256503657014100 00000000000000.TH HPL_dlaswp02N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp02N \- pack rows of A into columns of W. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp02N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&W0\fR, \fB\&double *\fR \fI\&W\fR, \fB\&const int\fR \fI\&LDW\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp02N\fR packs scattered rows of an array A into workspace W. The row offsets in A are specified by LINDXA. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A that should be copied into W. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of rows of A that should be copied into W. N must be at least zero. .TP 8 A (local input) const double * On entry, A points to an array of dimension (LDA,N). The rows of this array specified by LINDXA should be copied into W. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 W0 (local input/output) double * On exit, W0 is an array of size (M-1)*LDW+1, that contains the destination offset in U where the columns of W should be copied. .TP 8 W (local output) double * On entry, W is an array of size (LDW,M). On exit, W contains the rows LINDXA[i] for i in [0..M) of A stored contiguously in W(:,i). .TP 8 LDW (local input) const int On entry, LDW specifies the leading dimension of the array W. LDW must be at least MAX(1,N+1). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be copied into W. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M that contains the local row indexes of U that should be copied into A and replaced by the rows of W. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp03N.30000644000000000000000000000420611256503657014073 00000000000000.TH HPL_dlaswp03N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp03N \- copy rows of W into U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp03N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const double *\fR \fI\&W0\fR, \fB\&const double *\fR \fI\&W\fR, \fB\&const int\fR \fI\&LDW\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp03N\fR copies columns of W into rows of an array U. The destination in U of these columns contained in W is stored within W0. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of columns of W stored contiguously that should be copied into U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of columns of W stored contiguously that should be copied into U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,N). Columns of W are copied as rows within this array U at the positions specified in W0. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,M). .TP 8 W0 (local input) const double * On entry, W0 is an array of size (M-1)*LDW+1, that contains the destination offset in U where the columns of W should be copied. .TP 8 W (local input) const double * On entry, W is an array of size (LDW,M), that contains data to be copied into U. For i in [0..M), entries W(:,i) should be copied into the row or column W0(i*LDW) of U. .TP 8 LDW (local input) const int On entry, LDW specifies the leading dimension of the array W. LDW must be at least MAX(1,N+1). .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp03T.30000644000000000000000000000416311256503657014103 00000000000000.TH HPL_dlaswp03T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp03T \- copy columns of W into U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp03T(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const double *\fR \fI\&W0\fR, \fB\&const double *\fR \fI\&W\fR, \fB\&const int\fR \fI\&LDW\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp03T\fR copies columns of W into an array U. The destination in U of these columns contained in W is stored within W0. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of columns of W stored contiguously that should be copied into U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of columns of W stored contiguously that should be copied into U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,M). Columns of W are copied within the array U at the positions specified in W0. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,N). .TP 8 W0 (local input) const double * On entry, W0 is an array of size (M-1)*LDW+1, that contains the destination offset in U where the columns of W should be copied. .TP 8 W (local input) const double * On entry, W is an array of size (LDW,M), that contains data to be copied into U. For i in [0..M), entries W(:,i) should be copied into the row or column W0(i*LDW) of U. .TP 8 LDW (local input) const int On entry, LDW specifies the leading dimension of the array W. LDW must be at least MAX(1,N+1). .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp04N.30000644000000000000000000000622411256503657014076 00000000000000.TH HPL_dlaswp04N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp04N \- copy rows of U in A and replace them with columns of W. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp04N(\fR \fB\&const int\fR \fI\&M0\fR, \fB\&const int\fR \fI\&M1\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&W0\fR, \fB\&const double *\fR \fI\&W\fR, \fB\&const int\fR \fI\&LDW\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp04N\fR copies M0 rows of U into A and replaces those rows of U with columns of W. In addition M1 - M0 columns of W are copied into rows of U. .SH ARGUMENTS .TP 8 M0 (local input) const int On entry, M0 specifies the number of rows of U that should be copied into A and replaced by columns of W. M0 must be at least zero. .TP 8 M1 (local input) const int On entry, M1 specifies the number of columns of W that should be copied into rows of U. M1 must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the rows of U that should be copied into A. N must be at least zero. .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,N). This array contains the rows that are to be copied into A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,M1). .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by rows of U indicated by LINDXAU. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M0). .TP 8 W0 (local input) const double * On entry, W0 is an array of size (M-1)*LDW+1, that contains the destination offset in U where the columns of W should be copied. .TP 8 W (local input) const double * On entry, W is an array of size (LDW,M0+M1), that contains data to be copied into U. For i in [M0..M0+M1), the entries W(:,i) are copied into the row W0(i*LDW) of U. .TP 8 LDW (local input) const int On entry, LDW specifies the leading dimension of the array W. LDW must be at least MAX(1,N+1). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M0 containing the local row indexes A into which rows of U are copied. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M0 that contains the local row indexes of U that should be copied into A and replaced by the columns of W. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp04T.30000644000000000000000000000630011256503657014077 00000000000000.TH HPL_dlaswp04T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp04T \- copy columns of U in rows of A and replace them with columns of W. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp04T(\fR \fB\&const int\fR \fI\&M0\fR, \fB\&const int\fR \fI\&M1\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&W0\fR, \fB\&const double *\fR \fI\&W\fR, \fB\&const int\fR \fI\&LDW\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp04T\fR copies M0 columns of U into rows of A and replaces those columns of U with columns of W. In addition M1 - M0 columns of W are copied into U. .SH ARGUMENTS .TP 8 M0 (local input) const int On entry, M0 specifies the number of columns of U that should be copied into A and replaced by columns of W. M0 must be at least zero. .TP 8 M1 (local input) const int On entry, M1 specifies the number of columnns of W that will be copied into U. M1 must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the columns of U that will be copied into rows of A. N must be at least zero. .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,*). This array contains the columns that are to be copied into rows of A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,N). .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by columns of U indicated by LINDXAU. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M0). .TP 8 W0 (local input) const double * On entry, W0 is an array of size (M-1)*LDW+1, that contains the destination offset in U where the columns of W should be copied. .TP 8 W (local input) const double * On entry, W is an array of size (LDW,M0+M1), that contains data to be copied into U. For i in [M0..M0+M1), the entries W(:,i) are copied into the column W0(i*LDW) of U. .TP 8 LDW (local input) const int On entry, LDW specifies the leading dimension of the array W. LDW must be at least MAX(1,N+1). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M0 containing the local row indexes A into which columns of U are copied. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M0 that contains the local column indexes of U that should be copied into A and replaced by the columns of W. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp05N.30000644000000000000000000000427411256503657014102 00000000000000.TH HPL_dlaswp05N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp05N \- copy rows of U into A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp05N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp05N\fR copies rows of U of global offset LINDXAU into rows of A at positions indicated by LINDXA. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of U that should be copied into A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the rows of U that should be copied into A. N must be at least zero. .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by rows of U indicated by LINDXAU. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) const double * On entry, U points to an array of dimension (LDU,N). This array contains the rows that are to be copied into A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,M). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be copied from U. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M that contains the local row indexes of U that should be copied in A. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp05T.30000644000000000000000000000433411256503657014105 00000000000000.TH HPL_dlaswp05T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp05T \- copy rows of U into A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp05T(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR, \fB\&const int *\fR \fI\&LINDXAU\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp05T\fR copies columns of U of global offset LINDXAU into rows of A at positions indicated by LINDXA. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the columns of U that will be copied into rows of A. N must be at least zero. .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by columns of U indicated by LINDXAU. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) const double * On entry, U points to an array of dimension (LDU,*). This array contains the columns that are to be copied into rows of A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,N). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be copied from U. .TP 8 LINDXAU (local input) const int * On entry, LINDXAU is an array of dimension M that contains the local column indexes of U that should be copied in A. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp06N.30000644000000000000000000000375211256503657014103 00000000000000.TH HPL_dlaswp06N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp06N \- swap rows of U with rows of A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp06N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp06N\fR swaps rows of U with rows of A at positions indicated by LINDXA. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A that should be swapped with rows of U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the rows of A that should be swapped with rows of U. N must be at least zero. .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by rows or columns of U. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,N). This array contains the rows of U that are to be swapped with rows of A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,M). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be swapped with U. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp06T.30000644000000000000000000000400211256503657014076 00000000000000.TH HPL_dlaswp06T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp06T \- swap rows or columns of U with rows of A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp06T(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&LINDXA\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp06T\fR swaps columns of U with rows of A at positions indicated by LINDXA. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of A that should be swapped with columns of U. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the length of the rows of A that should be swapped with columns of U. N must be at least zero. .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,N). On exit, the rows of this array specified by LINDXA are replaced by columns of U. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .TP 8 U (local input/output) double * On entry, U points to an array of dimension (LDU,*). This array contains the columns of U that are to be swapped with rows of A. .TP 8 LDU (local input) const int On entry, LDU specifies the leading dimension of the array U. LDU must be at least MAX(1,N). .TP 8 LINDXA (local input) const int * On entry, LINDXA is an array of dimension M that contains the local row indexes of A that should be swapped with U. .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlaswp10N.30000644000000000000000000000307111256503657014070 00000000000000.TH HPL_dlaswp10N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlaswp10N \- performs a series column interchanges. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlaswp10N(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const int *\fR \fI\&IPIV\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlaswp10N\fR performs a sequence of local column interchanges on a matrix A. One column interchange is initiated for columns 0 through N-1 of A. .SH ARGUMENTS .TP 8 M (local input) const int __arg0__ .TP 8 N (local input) const int On entry, M specifies the number of rows of the array A. M must be at least zero. .TP 8 A (local input/output) double * On entry, N specifies the number of columns of the array A. N must be at least zero. .TP 8 LDA (local input) const int On entry, A points to an array of dimension (LDA,N). This array contains the columns onto which the interchanges should be applied. On exit, A contains the permuted matrix. .TP 8 IPIV (local input) const int * On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,M). .SH SEE ALSO .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05N \ (3), .BR HPL_dlaswp05T \ (3), .BR HPL_dlaswp06N \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlatcpy.30000644000000000000000000000323711256503657013763 00000000000000.TH HPL_dlatcpy 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlatcpy \- B := A^T .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlatcpy(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&B\fR, \fB\&const int\fR \fI\&LDB\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlatcpy\fR copies the transpose of an array A into an array B. .SH ARGUMENTS .TP 8 M (local input) const int On entry, M specifies the number of rows of the array B and the number of columns of A. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of rows of the array A and the number of columns of B. N must be at least zero. .TP 8 A (local input) const double * On entry, A points to an array of dimension (LDA,M). .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least MAX(1,N). .TP 8 B (local output) double * On entry, B points to an array of dimension (LDB,N). On exit, B is overwritten with the transpose of A. .TP 8 LDB (local input) const int On entry, LDB specifies the leading dimension of the array B. LDB must be at least MAX(1,M). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], b[2*2]; .br a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0; .br HPL_dlacpy( 2, 2, a, 2, b, 2 ); .br printf(" [%f,%f]\en", b[0], b[2]); .br printf("b=[%f,%f]\en", b[1], b[3]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dlacpy \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlocmax.30000644000000000000000000000422511256503657013750 00000000000000.TH HPL_dlocmax 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlocmax \- finds the maximum entry in matrix column. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlocmax(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&II\fR, \fB\&const int\fR \fI\&JJ\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlocmax\fR finds the maximum entry in the current column and packs the useful information in WORK[0:3]. On exit, WORK[0] contains the local maximum absolute value scalar, WORK[1] is the corresponding local row index, WORK[2] is the corresponding global row index, and WORK[3] is the coordinate of the process owning this max. When N is less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set to the total number of process rows. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 N (local input) const int On entry, N specifies the local number of rows of the column of A on which we operate. .TP 8 II (local input) const int On entry, II specifies the row offset where the column to be operated on starts with respect to the panel. .TP 8 JJ (local input) const int On entry, JJ specifies the column offset where the column to be operated on starts with respect to the panel. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 4. On exit, WORK[0] contains the local maximum absolute value scalar, WORK[1] contains the corresponding local row index, WORK[2] contains the corresponding global row index, and WORK[3] is the coordinate of process owning this max. .SH SEE ALSO .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlocswpN.30000644000000000000000000000361511256503657014114 00000000000000.TH HPL_dlocswpN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlocswpN \- locally swaps rows within panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlocswpN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&II\fR, \fB\&const int\fR \fI\&JJ\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlocswpN\fR performs the local swapping operations within a panel. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 II (local input) const int On entry, II specifies the row offset where the column to be operated on starts with respect to the panel. .TP 8 JJ (local input) const int On entry, JJ specifies the column offset where the column to be operated on starts with respect to the panel. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2 * (4+2*N0). WORK[0] contains the local maximum absolute value scalar, WORK[1] contains the corresponding local row index, WORK[2] contains the corresponding global row index, and WORK[3] is the coordinate of process owning this max. The N0 length max row is stored in WORK[4:4+N0-1]; Note that this is also the JJth row (or column) of L1. The remaining part of this array is used as workspace. .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dlocswpT.30000644000000000000000000000354111256503657014120 00000000000000.TH HPL_dlocswpT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dlocswpT \- locally swaps rows within panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dlocswpT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&II\fR, \fB\&const int\fR \fI\&JJ\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dlocswpT\fR performs the local swapping operations within a panel. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 II (local input) const int On entry, II specifies the row offset where the column to be operated on starts with respect to the panel. .TP 8 JJ (local input) const int On entry, JJ specifies the column offset where the column to be operated on starts with respect to the panel. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2 * (4+2*N0). WORK[0] contains the local maximum absolute value scalar, WORK[1] contains the corresponding local row index, WORK[2] contains the corresponding global row index, and WORK[3] is the coordinate of process owning this max. The N0 length max row is stored in WORK[4:4+N0-1]; Note that this is also the JJth row (or column) of L1. The remaining part of this array is used as workspace. .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dmatgen.30000644000000000000000000000275311256503657013744 00000000000000.TH HPL_dmatgen 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dmatgen \- random matrix generator. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dmatgen(\fR \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const int\fR \fI\&ISEED\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dmatgen\fR generates (or regenerates) a random matrix A. The pseudo-random generator uses the linear congruential algorithm: X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer Programming, Knuth 1973, Vol. 2. .SH ARGUMENTS .TP 8 M (input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 A (output) double * On entry, A points to an array of dimension (LDA,N). On exit, this array contains the coefficients of the randomly generated matrix. .TP 8 LDA (input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,M). .TP 8 ISEED (input) const int On entry, ISEED specifies the seed number to generate the matrix A. ISEED must be at least zero. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dscal.30000644000000000000000000000264211256503657013410 00000000000000.TH HPL_dscal 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dscal \- x = alpha * x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dscal(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dscal\fR scales the vector x by alpha. .SH ARGUMENTS .TP 8 N (local input) const int On entry, N specifies the length of the vector x. N must be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero, then the entries of the incremented array X need not be set on input. .TP 8 X (local input/output) double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. On exit, the entries of the incremented array X are scaled by the scalar alpha. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double x[3]; .br x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; .br HPL_dscal( 3, 2.0, x, 1 ); .br printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_daxpy \ (3), .BR HPL_dcopy \ (3), .BR HPL_dswap \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dswap.30000644000000000000000000000341111256503657013433 00000000000000.TH HPL_dswap 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dswap \- y <-> x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dswap(\fR \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR, \fB\&double *\fR \fI\&Y\fR, \fB\&const int\fR \fI\&INCY\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dswap\fR swaps the vectors x and y. .SH ARGUMENTS .TP 8 N (local input) const int On entry, N specifies the length of the vectors x and y. N must be at least zero. .TP 8 X (local input/output) double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. On exit, the entries of the incremented array X are updated with the entries of the incremented array Y. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .TP 8 Y (local input/output) double * On entry, Y is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. On exit, the entries of the incremented array Y are updated with the entries of the incremented array X. .TP 8 INCY (local input) const int On entry, INCY specifies the increment for the elements of Y. INCY must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double x[3], y[3]; .br x[0] = 1.0; x[1] = 2.0; x[2] = 3.0; .br y[0] = 4.0; y[1] = 5.0; y[2] = 6.0; .br HPL_dswap( 3, x, 1, y, 1 ); .br printf("x=[%f,%f,%f]\en", x[0], x[1], x[2]); .br printf("y=[%f,%f,%f]\en", y[0], y[1], y[2]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_daxpy \ (3), .BR HPL_dcopy \ (3), .BR HPL_dscal \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dtrsm.30000644000000000000000000001167111256503657013455 00000000000000.TH HPL_dtrsm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dtrsm \- B := A^{-1} * B or B := B * A^{-1}. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dtrsm(\fR \fB\&const enum HPL_ORDER\fR \fI\&ORDER\fR, \fB\&const enum HPL_SIDE\fR \fI\&SIDE\fR, \fB\&const enum HPL_UPLO\fR \fI\&UPLO\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANS\fR, \fB\&const enum HPL_DIAG\fR \fI\&DIAG\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double\fR \fI\&ALPHA\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&B\fR, \fB\&const int\fR \fI\&LDB\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dtrsm\fR solves one of the matrix equations op( A ) * X = alpha * B, or X * op( A ) = alpha * B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of op( A ) = A or op( A ) = A^T. The matrix X is overwritten on B. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine. .SH ARGUMENTS .TP 8 ORDER (local input) const enum HPL_ORDER On entry, ORDER specifies the storage format of the operands as follows: ORDER = HplRowMajor, ORDER = HplColumnMajor. .TP 8 SIDE (local input) const enum HPL_SIDE On entry, SIDE specifies whether op(A) appears on the left or right of X as follows: SIDE==HplLeft op( A ) * X = alpha * B, SIDE==HplRight X * op( A ) = alpha * B. .TP 8 UPLO (local input) const enum HPL_UPLO On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced. When UPLO==HplUpper, only the upper triangular part of A is to be referenced, otherwise only the lower triangular part of A is to be referenced. .TP 8 TRANS (local input) const enum HPL_TRANS On entry, TRANSA specifies the form of op(A) to be used in the matrix-matrix operation follows: TRANSA==HplNoTrans : op( A ) = A, TRANSA==HplTrans : op( A ) = A^T, TRANSA==HplConjTrans : op( A ) = A^T. .TP 8 DIAG (local input) const enum HPL_DIAG On entry, DIAG specifies whether A is unit triangular or not. When DIAG==HplUnit, A is assumed to be unit triangular, and otherwise, A is not assumed to be unit triangular. .TP 8 M (local input) const int On entry, M specifies the number of rows of the matrix B. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the number of columns of the matrix B. N must be at least zero. .TP 8 ALPHA (local input) const double On entry, ALPHA specifies the scalar alpha. When ALPHA is supplied as zero then the elements of the matrix B need not be set on input. .TP 8 A (local input) const double * On entry, A points to an array of size equal to or greater than LDA * k, where k is m when SIDE==HplLeft and is n otherwise. Before entry with UPLO==HplUpper, the leading k by k upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. When UPLO==HplLower on entry, the leading k by k lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG==HplUnit, the diagonal elements of A not referenced either, but are assumed to be unity. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of A as declared in the calling (sub) program. LDA must be at least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. .TP 8 B (local input/output) double * On entry, B points to an array of size equal to or greater than LDB * n. Before entry, the leading m by n part of the array B must contain the matrix B, except when beta is zero, in which case B need not be set on entry. On exit, the array B is overwritten by the m by n solution matrix. .TP 8 LDB (local input) const int On entry, LDB specifies the leading dimension of B as declared in the calling (sub) program. LDB must be at least MAX(1,m). .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], b[2*2]; .br a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; .br b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0; .br HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, .br HplNoTrans, HplNonUnit, 2, 2, 2.0, .br a, 2, b, 2 ); .br printf(" [%f,%f]\en", b[0], b[2]); .br printf("b=[%f,%f]\en", b[1], b[3]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dgemm \ (3). hpcc-1.4.1/hpl/man/man3/HPL_dtrsv.30000644000000000000000000000732611256503657013470 00000000000000.TH HPL_dtrsv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_dtrsv \- x := A^{-1} x. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_dtrsv(\fR \fB\&const enum HPL_ORDER\fR \fI\&ORDER\fR, \fB\&const enum HPL_UPLO\fR \fI\&UPLO\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANS\fR, \fB\&const enum HPL_DIAG\fR \fI\&DIAG\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_dtrsv\fR solves one of the systems of equations A * x = b, or A^T * x = b, where b and x are n-element vectors and A is an n by n non-unit, or unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine. .SH ARGUMENTS .TP 8 ORDER (local input) const enum HPL_ORDER On entry, ORDER specifies the storage format of the operands as follows: ORDER = HplRowMajor, ORDER = HplColumnMajor. .TP 8 UPLO (local input) const enum HPL_UPLO On entry, UPLO specifies whether the upper or lower triangular part of the array A is to be referenced. When UPLO==HplUpper, only the upper triangular part of A is to be referenced, otherwise only the lower triangular part of A is to be referenced. .TP 8 TRANS (local input) const enum HPL_TRANS On entry, TRANS specifies the equations to be solved as follows: TRANS==HplNoTrans A * x = b, TRANS==HplTrans A^T * x = b. .TP 8 DIAG (local input) const enum HPL_DIAG On entry, DIAG specifies whether A is unit triangular or not. When DIAG==HplUnit, A is assumed to be unit triangular, and otherwise, A is not assumed to be unit triangular. .TP 8 N (local input) const int On entry, N specifies the order of the matrix A. N must be at least zero. .TP 8 A (local input) const double * On entry, A points to an array of size equal to or greater than LDA * n. Before entry with UPLO==HplUpper, the leading n by n upper triangular part of the array A must contain the upper triangular matrix and the strictly lower triangular part of A is not referenced. When UPLO==HplLower on entry, the leading n by n lower triangular part of the array A must contain the lower triangular matrix and the strictly upper triangular part of A is not referenced. Note that when DIAG==HplUnit, the diagonal elements of A not referenced either, but are assumed to be unity. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of A as declared in the calling (sub) program. LDA must be at least MAX(1,n). .TP 8 X (local input/output) double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. Before entry, the incremented array X must contain the n element right-hand side vector b. On exit, X is overwritten with the solution vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double a[2*2], x[2]; .br a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0; .br x[0] = 2.0; x[1] = 1.0; .br HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, .br HplNoUnit, a, 2, x, 1 ); .br printf("x=[%f,%f]\en", x[0], x[1]); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_dger \ (3), .BR HPL_dgemv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_equil.30000644000000000000000000000551111256503657013437 00000000000000.TH HPL_equil 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_equil \- Equilibrate U and forward the column panel L. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_equil(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const enum HPL_TRANS\fR \fI\&TRANS\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&int *\fR \fI\&IPLEN\fR, \fB\&const int *\fR \fI\&IPMAP\fR, \fB\&const int *\fR \fI\&IPMAPM1\fR, \fB\&int *\fR \fI\&IWORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_equil\fR equilibrates the local pieces of U, so that on exit to this function, pieces of U contained in every process row are of the same size. This phase makes the rolling phase optimal. In addition, this function probes for the column panel L and forwards it when possible. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be equilibrated) information. .TP 8 TRANS (global input) const enum HPL_TRANS On entry, TRANS specifies whether U is stored in transposed or non-transposed form. .TP 8 N (local input) const int On entry, N specifies the number of rows or columns of U. N must be at least 0. .TP 8 U (local input/output) double * On entry, U is an array of dimension (LDU,*) containing the local pieces of U in each process row. .TP 8 LDU (local input) const int On entry, LDU specifies the local leading dimension of U. LDU should be at least MAX(1,IPLEN[nprow]) when U is stored in non-transposed form, and MAX(1,N) otherwise. .TP 8 IPLEN (global input) int * On entry, IPLEN is an array of dimension NPROW+1. This array is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U in process IPMAP[i]. .TP 8 IPMAP (global input) const int * On entry, IPMAP is an array of dimension NPROW. This array contains the logarithmic mapping of the processes. In other words, IPMAP[myrow] is the absolute coordinate of the sorted process. .TP 8 IPMAPM1 (global input) const int * On entry, IPMAPM1 is an array of dimension NPROW. This array contains the inverse of the logarithmic mapping contained in IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. .TP 8 IWORK (workspace) int * On entry, IWORK is a workarray of dimension NPROW+1. .SH SEE ALSO .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_fprintf.30000644000000000000000000000171111256503657013766 00000000000000.TH HPL_fprintf 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_fprintf \- fprintf + fflush wrapper. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_fprintf(\fR \fB\&FILE *\fR \fI\&STREAM\fR, \fB\&const char *\fR \fI\&FORM\fR, \fB\&...\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_fprintf\fR is a wrapper around fprintf flushing the output stream. .SH ARGUMENTS .TP 8 STREAM (local input) FILE * On entry, STREAM specifies the output stream. .TP 8 FORM (local input) const char * On entry, FORM specifies the format, i.e., how the subsequent arguments are converted for output. .TP 8 (local input) ... On entry, ... is the list of arguments to be printed within the format string. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br HPL_fprintf( stdout, "Hello World.\en" ); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_abort \ (3), .BR HPL_warn \ (3). hpcc-1.4.1/hpl/man/man3/HPL_grid_exit.30000644000000000000000000000127111256503657014275 00000000000000.TH HPL_grid_exit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_grid_exit \- Exit process grid. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_grid_exit(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_grid_exit\fR marks the process grid object for deallocation. The returned error code MPI_SUCCESS indicates successful completion. Other error codes are (MPI) implementation dependent. .SH ARGUMENTS .TP 8 GRID (local input/output) HPL_T_grid * On entry, GRID points to the data structure containing the process grid to be released. .SH SEE ALSO .BR HPL_pnum \ (3), .BR HPL_grid_init \ (3), .BR HPL_grid_info \ (3). hpcc-1.4.1/hpl/man/man3/HPL_grid_info.30000644000000000000000000000271611256503657014264 00000000000000.TH HPL_grid_info 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_grid_info \- Retrieve grid information. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_grid_info(\fR \fB\&const HPL_T_grid *\fR \fI\&GRID\fR, \fB\&int *\fR \fI\&NPROW\fR, \fB\&int *\fR \fI\&NPCOL\fR, \fB\&int *\fR \fI\&MYROW\fR, \fB\&int *\fR \fI\&MYCOL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_grid_info\fR returns the grid shape and the coordinates in the grid of the calling process. Successful completion is indicated by the returned error code MPI_SUCCESS. Other error codes depend on the MPI implementation. .SH ARGUMENTS .TP 8 GRID (local input) const HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 NPROW (global output) int * On exit, NPROW specifies the number of process rows in the grid. NPROW is at least one. .TP 8 NPCOL (global output) int * On exit, NPCOL specifies the number of process columns in the grid. NPCOL is at least one. .TP 8 MYROW (global output) int * On exit, MYROW specifies my row process coordinate in the grid. MYROW is greater than or equal to zero and less than NPROW. .TP 8 MYCOL (global output) int * On exit, MYCOL specifies my column process coordinate in the grid. MYCOL is greater than or equal to zero and less than NPCOL. .SH SEE ALSO .BR HPL_pnum \ (3), .BR HPL_grid_init \ (3), .BR HPL_grid_exit \ (3). hpcc-1.4.1/hpl/man/man3/HPL_grid_init.30000644000000000000000000000342411256503657014271 00000000000000.TH HPL_grid_init 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_grid_init \- Create a process grid. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_grid_init(\fR \fB\&MPI_Comm\fR \fI\&COMM\fR, \fB\&const HPL_T_ORDER\fR \fI\&ORDER\fR, \fB\&const int\fR \fI\&NPROW\fR, \fB\&const int\fR \fI\&NPCOL\fR, \fB\&HPL_T_grid *\fR \fI\&GRID\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_grid_init\fR creates a NPROW x NPCOL process grid using column- or row-major ordering from an initial collection of processes identified by an MPI communicator. Successful completion is indicated by the returned error code MPI_SUCCESS. Other error codes depend on the MPI implementation. The coordinates of processes that are not part of the grid are set to values outside of [0..NPROW) x [0..NPCOL). .SH ARGUMENTS .TP 8 COMM (global/local input) MPI_Comm On entry, COMM is the MPI communicator identifying the initial collection of processes out of which the grid is formed. .TP 8 ORDER (global input) const HPL_T_ORDER On entry, ORDER specifies how the processes should be ordered in the grid as follows: ORDER = HPL_ROW_MAJOR row-major ordering; ORDER = HPL_COLUMN_MAJOR column-major ordering; .TP 8 NPROW (global input) const int On entry, NPROW specifies the number of process rows in the grid to be created. NPROW must be at least one. .TP 8 NPCOL (global input) const int On entry, NPCOL specifies the number of process columns in the grid to be created. NPCOL must be at least one. .TP 8 GRID (local input/output) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information to be initialized. .SH SEE ALSO .BR HPL_pnum \ (3), .BR HPL_grid_info \ (3), .BR HPL_grid_exit \ (3). hpcc-1.4.1/hpl/man/man3/HPL_idamax.30000644000000000000000000000234111256503657013561 00000000000000.TH HPL_idamax 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_idamax \- 1st k s.t. |x_k| = max_i(|x_i|). .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_idamax(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const double *\fR \fI\&X\fR, \fB\&const int\fR \fI\&INCX\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_idamax\fR returns the index in an n-vector x of the first element having maximum absolute value. .SH ARGUMENTS .TP 8 N (local input) const int On entry, N specifies the length of the vector x. N must be at least zero. .TP 8 X (local input) const double * On entry, X is an incremented array of dimension at least ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. .TP 8 INCX (local input) const int On entry, INCX specifies the increment for the elements of X. INCX must not be zero. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br double x[3]; .br int imax; .br x[0] = 1.0; x[1] = 3.0; x[2] = 2.0; .br imax = HPL_idamax( 3, x, 1 ); .br printf("imax=%d\en", imax); .br exit(0); .br return(0); .br } .SH SEE ALSO .BR HPL_daxpy \ (3), .BR HPL_dcopy \ (3), .BR HPL_dscal \ (3), .BR HPL_dswap \ (3). hpcc-1.4.1/hpl/man/man3/HPL_indxg2l.30000644000000000000000000000303211256503657013663 00000000000000.TH HPL_indxg2l 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_indxg2l \- Map a global index into a local one. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_indxg2l(\fR \fB\&const int\fR \fI\&IG\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_indxg2l\fR computes the local index of a matrix entry pointed to by the global index IG. This local returned index is the same in all processes. .SH ARGUMENTS .TP 8 IG (input) const int On entry, IG specifies the global index of the matrix entry. IG must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of the global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix. NB must be larger than one. .TP 8 SRCPROC (input) const int On entry, if SRCPROC = -1, the data is not distributed but replicated, in which case this routine returns IG in all processes. Otherwise, the value of SRCPROC is ignored. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process rows or columns over which the matrix is distributed. NPROCS must be at least one. .SH SEE ALSO .BR HPL_indxg2lp \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numroc \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_indxg2lp.30000644000000000000000000000370711256503657014054 00000000000000.TH HPL_indxg2lp 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_indxg2lp \- Map a local index into a global one. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_indxg2lp(\fR \fB\&int *\fR \fI\&IL\fR, \fB\&int *\fR \fI\&PROC\fR, \fB\&const int\fR \fI\&IG\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_indxg2lp\fR computes the local index of a matrix entry pointed to by the global index IG as well as the process coordinate which posseses this entry. The local returned index is the same in all processes. .SH ARGUMENTS .TP 8 IL (output) int * On exit, IL specifies the local index corresponding to IG. IL is at least zero. .TP 8 PROC (output) int * On exit, PROC is the coordinate of the process owning the entry specified by the global index IG. PROC is at least zero and less than NPROCS. .TP 8 IG (input) const int On entry, IG specifies the global index of the matrix entry. IG must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of the global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 SRCPROC (input) const int On entry, if SRCPROC = -1, the data is not distributed but replicated, in which case this routine returns IG in all processes. Otherwise, the value of SRCPROC is ignored. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process rows or columns over which the matrix is distributed. NPROCS must be at least one. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numroc \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_indxg2p.30000644000000000000000000000277511256503657013704 00000000000000.TH HPL_indxg2p 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_indxg2p \- Map a global index into a process coordinate. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_indxg2p(\fR \fB\&const int\fR \fI\&IG\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_indxg2p\fR computes the process coordinate which posseses the entry of a matrix specified by a global index IG. .SH ARGUMENTS .TP 8 IG (input) const int On entry, IG specifies the global index of the matrix entry. IG must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of the global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 SRCPROC (input) const int On entry, SRCPROC specifies the coordinate of the process that possesses the first row or column of the matrix. SRCPROC must be at least zero and strictly less than NPROCS. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process rows or columns over which the matrix is distributed. NPROCS must be at least one. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numroc \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_indxl2g.30000644000000000000000000000340211256503657013664 00000000000000.TH HPL_indxl2g 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_indxl2g \- Map a index-process pair into a global index. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_indxl2g(\fR \fB\&const int\fR \fI\&IL\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&PROC\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_indxl2g\fR computes the global index of a matrix entry pointed to by the local index IL of the process indicated by PROC. .SH ARGUMENTS .TP 8 IL (input) const int On entry, IL specifies the local index of the matrix entry. IL must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of the global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 PROC (input) const int On entry, PROC specifies the coordinate of the process whose local array row or column is to be determined. PROC must be at least zero and strictly less than NPROCS. .TP 8 SRCPROC (input) const int On entry, SRCPROC specifies the coordinate of the process that possesses the first row or column of the matrix. SRCPROC must be at least zero and strictly less than NPROCS. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process rows or columns over which the matrix is distributed. NPROCS must be at least one. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2lp \ (3), .BR HPL_indxg2p \ (3), .BR HPL_numroc \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_infog2l.30000644000000000000000000000727711256503657013673 00000000000000.TH HPL_infog2l 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_infog2l \- global to local index translation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_infog2l(\fR \fB\&int\fR \fI\&I\fR, \fB\&int\fR \fI\&J\fR, \fB\&const int\fR \fI\&IMB\fR, \fB\&const int\fR \fI\&MB\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&RSRC\fR, \fB\&const int\fR \fI\&CSRC\fR, \fB\&const int\fR \fI\&MYROW\fR, \fB\&const int\fR \fI\&MYCOL\fR, \fB\&const int\fR \fI\&NPROW\fR, \fB\&const int\fR \fI\&NPCOL\fR, \fB\&int *\fR \fI\&II\fR, \fB\&int *\fR \fI\&JJ\fR, \fB\&int *\fR \fI\&PROW\fR, \fB\&int *\fR \fI\&PCOL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_infog2l\fR computes the starting local index II, JJ corresponding to the submatrix starting globally at the entry pointed by I, J. This routine returns the coordinates in the grid of the process owning the matrix entry of global indexes I, J, namely PROW and PCOL. .SH ARGUMENTS .TP 8 I (global input) int On entry, I specifies the global row index of the matrix entry. I must be at least zero. .TP 8 J (global input) int On entry, J specifies the global column index of the matrix entry. J must be at least zero. .TP 8 IMB (global input) const int On entry, IMB specifies the size of the first row block of the global matrix. IMB must be at least one. .TP 8 MB (global input) const int On entry, MB specifies the blocking factor used to partition and distribute the rows of the matrix A. MB must be larger than one. .TP 8 INB (global input) const int On entry, INB specifies the size of the first column block of the global matrix. INB must be at least one. .TP 8 NB (global input) const int On entry, NB specifies the blocking factor used to partition and distribute the columns of the matrix A. NB must be larger than one. .TP 8 RSRC (global input) const int On entry, RSRC specifies the row coordinate of the process that possesses the row I. RSRC must be at least zero and strictly less than NPROW. .TP 8 CSRC (global input) const int On entry, CSRC specifies the column coordinate of the process that possesses the column J. CSRC must be at least zero and strictly less than NPCOL. .TP 8 MYROW (local input) const int On entry, MYROW specifies my row process coordinate in the grid. MYROW is greater than or equal to zero and less than NPROW. .TP 8 MYCOL (local input) const int On entry, MYCOL specifies my column process coordinate in the grid. MYCOL is greater than or equal to zero and less than NPCOL. .TP 8 NPROW (global input) const int On entry, NPROW specifies the number of process rows in the grid. NPROW is at least one. .TP 8 NPCOL (global input) const int On entry, NPCOL specifies the number of process columns in the grid. NPCOL is at least one. .TP 8 II (local output) int * On exit, II specifies the local starting row index of the submatrix. On exit, II is at least 0. .TP 8 JJ (local output) int * On exit, JJ specifies the local starting column index of the submatrix. On exit, JJ is at least 0. .TP 8 PROW (global output) int * On exit, PROW is the row coordinate of the process owning the entry specified by the global index I. PROW is at least zero and less than NPROW. .TP 8 PCOL (global output) int * On exit, PCOL is the column coordinate of the process owning the entry specified by the global index J. PCOL is at least zero and less than NPCOL. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numroc \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_jumpit.30000644000000000000000000000264211256503657013632 00000000000000.TH HPL_jumpit 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_jumpit \- jump into the random sequence. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_jumpit(\fR \fB\&int *\fR \fI\&MULT\fR, \fB\&int *\fR \fI\&IADD\fR, \fB\&int *\fR \fI\&IRANN\fR, \fB\&int *\fR \fI\&IRANM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_jumpit\fR jumps in the random sequence from the number X(n) encoded in IRANN to the number X(m) encoded in IRANM using the constants A and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A and C obviously depend on m and n, see the function HPL_xjumpm in order to initialize them. .SH ARGUMENTS .TP 8 MULT (local input) int * On entry, MULT is an array of dimension 2, that contains the 16-lower and 15-higher bits of the constant A. .TP 8 IADD (local input) int * On entry, IADD is an array of dimension 2, that contains the 16-lower and 15-higher bits of the constant C. .TP 8 IRANN (local input) int * On entry, IRANN is an array of dimension 2, that contains the 16-lower and 15-higher bits of the encoding of X(n). .TP 8 IRANM (local output) int * On entry, IRANM is an array of dimension 2. On exit, this array contains respectively the 16-lower and 15-higher bits of the encoding of X(m). .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_ladd.30000644000000000000000000000215711256503657013227 00000000000000.TH HPL_ladd 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_ladd \- Adds two long positive integers. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_ladd(\fR \fB\&int *\fR \fI\&J\fR, \fB\&int *\fR \fI\&K\fR, \fB\&int *\fR \fI\&I\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_ladd\fR adds without carry two long positive integers K and J and puts the result into I. The long integers I, J, K are encoded on 64 bits using an array of 2 integers. The 32-lower bits are stored in the first entry of each array, the 32-higher bits in the second entry. .SH ARGUMENTS .TP 8 J (local input) int * On entry, J is an integer array of dimension 2 containing the encoded long integer J. .TP 8 K (local input) int * On entry, K is an integer array of dimension 2 containing the encoded long integer K. .TP 8 I (local output) int * On entry, I is an integer array of dimension 2. On exit, this array contains the encoded long integer result. .SH SEE ALSO .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_lmul.30000644000000000000000000000230111256503657013263 00000000000000.TH HPL_lmul 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_lmul \- multiplies 2 long positive integers. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_lmul(\fR \fB\&int *\fR \fI\&K\fR, \fB\&int *\fR \fI\&J\fR, \fB\&int *\fR \fI\&I\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_lmul\fR multiplies without carry two long positive integers K and J and puts the result into I. The long integers I, J, K are encoded on 64 bits using an array of 2 integers. The 32-lower bits are stored in the first entry of each array, the 32-higher bits in the second entry of each array. For efficiency purposes, the intrisic modulo function is inlined. .SH ARGUMENTS .TP 8 K (local input) int * On entry, K is an integer array of dimension 2 containing the encoded long integer K. .TP 8 J (local input) int * On entry, J is an integer array of dimension 2 containing the encoded long integer J. .TP 8 I (local output) int * On entry, I is an integer array of dimension 2. On exit, this array contains the encoded long integer result. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_logsort.30000644000000000000000000000450611256503657014014 00000000000000.TH HPL_logsort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_logsort \- Sort the processes in logarithmic order. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_logsort(\fR \fB\&const int\fR \fI\&NPROCS\fR, \fB\&const int\fR \fI\&ICURROC\fR, \fB\&int *\fR \fI\&IPLEN\fR, \fB\&int *\fR \fI\&IPMAP\fR, \fB\&int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_logsort\fR computes an array IPMAP and its inverse IPMAPM1 that contain the logarithmic sorted processes id with repect to the local number of rows of U that they own. This is necessary to ensure that the logarithmic spreading of U is optimal in terms of number of steps and communication volume as well. In other words, the larget pieces of U will be sent a minimal number of times. .SH ARGUMENTS .TP 8 NPROCS (global input) const int On entry, NPROCS specifies the number of process rows in the process grid. NPROCS is at least one. .TP 8 ICURROC (global input) const int On entry, ICURROC is the source process row. .TP 8 IPLEN (global input/output) int * On entry, IPLEN is an array of dimension NPROCS+1, such that IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, that process i-1 has. On exit, IPLEN[i] is the number of rows of U in the processes before process IPMAP[i] after the sort, with the convention that IPLEN[NPROCS] is the total number of rows of the panel. In other words, IPLEN[i+1] - IPLEN[i] is the number of rows of A that should be moved to the process IPMAP[i]. IPLEN is such that the number of rows of the source process row is IPLEN[1] - IPLEN[0], and the remaining entries of this array are sorted so that the quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. .TP 8 IPMAP (global output) int * On entry, IPMAP is an array of dimension NPROCS. On exit, array contains the logarithmic mapping of the processes. In other words, IPMAP[myroc] is the corresponding sorted process coordinate. .TP 8 IPMAPM1 (global output) int * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, this array contains the inverse of the logarithmic mapping contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in [0.. NPROCS) .SH SEE ALSO .BR HPL_plindx1 \ (3), .BR HPL_plindx10 \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_max.30000644000000000000000000000212611256503657013104 00000000000000.TH HPL_max 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_max \- Combine (max) two buffers. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_max(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const void *\fR \fI\&IN\fR, \fB\&void *\fR \fI\&INOUT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_max\fR combines (max) two buffers. .SH ARGUMENTS .TP 8 N (input) const int On entry, N specifies the length of the buffers to be combined. N must be at least zero. .TP 8 IN (input) const void * On entry, IN points to the input-only buffer to be combined. .TP 8 INOUT (input/output) void * On entry, INOUT points to the input-output buffer to be combined. On exit, the entries of this array contains the combined results. .TP 8 DTYPE (input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_reduce \ (3), .BR HPL_all_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_min \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_min.30000644000000000000000000000212611256503657013102 00000000000000.TH HPL_min 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_min \- Combine (min) two buffers. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_min(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const void *\fR \fI\&IN\fR, \fB\&void *\fR \fI\&INOUT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_min\fR combines (min) two buffers. .SH ARGUMENTS .TP 8 N (input) const int On entry, N specifies the length of the buffers to be combined. N must be at least zero. .TP 8 IN (input) const void * On entry, IN points to the input-only buffer to be combined. .TP 8 INOUT (input/output) void * On entry, INOUT points to the input-output buffer to be combined. On exit, the entries of this array contains the combined results. .TP 8 DTYPE (input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_reduce \ (3), .BR HPL_all_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_numroc.30000644000000000000000000000340311256503657013621 00000000000000.TH HPL_numroc 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_numroc \- Compute the local number of row/columns. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_numroc(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&PROC\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_numroc\fR returns the local number of matrix rows/columns process PROC will get if we give out N rows/columns starting from global index 0. .SH ARGUMENTS .TP 8 N (input) const int On entry, N specifies the number of rows/columns being dealt out. N must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of the global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 PROC (input) const int On entry, PROC specifies the coordinate of the process whose local portion is determined. PROC must be at least zero and strictly less than NPROCS. .TP 8 SRCPROC (input) const int On entry, SRCPROC specifies the coordinate of the process that possesses the first row or column of the matrix. SRCPROC must be at least zero and strictly less than NPROCS. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process rows or columns over which the matrix is distributed. NPROCS must be at least one. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2lp \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numrocI \ (3). hpcc-1.4.1/hpl/man/man3/HPL_numrocI.30000644000000000000000000000364211256503657013737 00000000000000.TH HPL_numrocI 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_numrocI \- Compute the local number of row/columns. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_numrocI(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&I\fR, \fB\&const int\fR \fI\&INB\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const int\fR \fI\&PROC\fR, \fB\&const int\fR \fI\&SRCPROC\fR, \fB\&const int\fR \fI\&NPROCS\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_numrocI\fR returns the local number of matrix rows/columns process PROC will get if we give out N rows/columns starting from global index I. .SH ARGUMENTS .TP 8 N (input) const int On entry, N specifies the number of rows/columns being dealt out. N must be at least zero. .TP 8 I (input) const int On entry, I specifies the global index of the matrix entry I must be at least zero. .TP 8 INB (input) const int On entry, INB specifies the size of the first block of th global matrix. INB must be at least one. .TP 8 NB (input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 PROC (input) const int On entry, PROC specifies the coordinate of the process whos local portion is determined. PROC must be at least zero an strictly less than NPROCS. .TP 8 SRCPROC (input) const int On entry, SRCPROC specifies the coordinate of the proces that possesses the first row or column of the matrix. SRCPRO must be at least zero and strictly less than NPROCS. .TP 8 NPROCS (input) const int On entry, NPROCS specifies the total number of process row or columns over which the matrix is distributed. NPROCS mus be at least one. .SH SEE ALSO .BR HPL_indxg2l \ (3), .BR HPL_indxg2lp \ (3), .BR HPL_indxg2p \ (3), .BR HPL_indxl2g \ (3), .BR HPL_numroc \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pabort.30000644000000000000000000000204711256503657013610 00000000000000.TH HPL_pabort 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pabort \- halts execution. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pabort(\fR \fB\&int\fR \fI\&LINE\fR, \fB\&const char *\fR \fI\&SRNAME\fR, \fB\&const char *\fR \fI\&FORM\fR, \fB\&...\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pabort\fR displays an error message on stderr and halts execution. .SH ARGUMENTS .TP 8 LINE (local input) int On entry, LINE specifies the line number in the file where the error has occured. When LINE is not a positive line number, it is ignored. .TP 8 SRNAME (local input) const char * On entry, SRNAME should be the name of the routine calling this error handler. .TP 8 FORM (local input) const char * On entry, FORM specifies the format, i.e., how the subsequent arguments are converted for output. .TP 8 (local input) ... On entry, ... is the list of arguments to be printed within the format string. .SH SEE ALSO .BR HPL_fprintf \ (3), .BR HPL_pwarn \ (3). hpcc-1.4.1/hpl/man/man3/HPL_packL.30000644000000000000000000000214311256503657013350 00000000000000.TH HPL_packL 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_packL \- Form the MPI structure for the row ring broadcasts. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_packL(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&INDEX\fR, \fB\&const int\fR \fI\&LEN\fR, \fB\&const int\fR \fI\&IBUF\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_packL\fR forms the MPI data type for the panel to be broadcast. Successful completion is indicated by the returned error code MPI_SUCCESS. .SH ARGUMENTS .TP 8 PANEL (input/output) HPL_T_panel * On entry, PANEL points to the current panel data structure being broadcast. .TP 8 INDEX (input) const int On entry, INDEX points to the first entry of the packed buffer being broadcast. .TP 8 LEN (input) const int On entry, LEN is the length of the packed buffer. .TP 8 IBUF (input) const int On entry, IBUF specifies the panel buffer/count/type entries that should be initialized. .SH SEE ALSO .BR HPL_binit \ (3), .BR HPL_bcast \ (3), .BR HPL_bwait \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pddriver.30000644000000000000000000000060111256503657014132 00000000000000.TH main 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME main \- HPL main timing program. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&main();\fR .SH DESCRIPTION \fB\&main\fR is the main driver program for testing the HPL routines. This program is driven by a short data file named "HPL.dat". .SH SEE ALSO .BR HPL_pdinfo \ (3), .BR HPL_pdtest \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdfact.30000644000000000000000000000473011256503657013563 00000000000000.TH HPL_pdfact 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdfact \- recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdfact(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdfact\fR recursively factorizes a 1-dimensional panel of columns. The RPFACT function pointer specifies the recursive algorithm to be used, either Crout, Left- or Right looking. NBMIN allows to vary the recursive stopping criterium in terms of the number of columns in the panel, and NDIV allow to specify the number of subpanels each panel should be divided into. Usuallly a value of 2 will be chosen. Finally PFACT is a function pointer specifying the non-recursive algorithm to to be used on at most NBMIN columns. One can also choose here between Crout, Left- or Right looking. Empirical tests seem to indicate that values of 4 or 8 for NBMIN give the best results. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdgesv.30000644000000000000000000000221411256503657013605 00000000000000.TH HPL_pdgesv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdgesv \- Solve A x = b. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdgesv(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdgesv\fR factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with or without look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .SH SEE ALSO .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdtrsv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdgesv0.30000644000000000000000000000246311256503657013673 00000000000000.TH HPL_pdgesv0 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdgesv0 \- Factor an N x N+1 matrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdgesv0(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdgesv0\fR factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant without look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdfact \ (3), .BR HPL_binit \ (3), .BR HPL_bcast \ (3), .BR HPL_bwait \ (3), .BR HPL_pdupdateNN \ (3), .BR HPL_pdupdateNT \ (3), .BR HPL_pdupdateTN \ (3), .BR HPL_pdupdateTT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdgesvK1.30000644000000000000000000000243711256503657014010 00000000000000.TH HPL_pdgesvK1 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdgesvK1 \- Factor an N x N+1 matrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdgesvK1(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdgesvK1\fR factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdfact \ (3), .BR HPL_binit \ (3), .BR HPL_bcast \ (3), .BR HPL_bwait \ (3), .BR HPL_pdupdateNN \ (3), .BR HPL_pdupdateNT \ (3), .BR HPL_pdupdateTN \ (3), .BR HPL_pdupdateTT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdgesvK2.30000644000000000000000000000246611256503657014013 00000000000000.TH HPL_pdgesvK2 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdgesvK2 \- Factor an N x N+1 matrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdgesvK2(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdgesvK2\fR factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdfact \ (3), .BR HPL_binit \ (3), .BR HPL_bcast \ (3), .BR HPL_bwait \ (3), .BR HPL_pdupdateNN \ (3), .BR HPL_pdupdateNT \ (3), .BR HPL_pdupdateTN \ (3), .BR HPL_pdupdateTT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdinfo.30000644000000000000000000001576411256503657013612 00000000000000.TH HPL_pdinfo 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdinfo \- Read input parameter file. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdinfo(\fR \fB\&HPL_T_test *\fR \fI\&TEST\fR, \fB\&int *\fR \fI\&NS\fR, \fB\&int *\fR \fI\&N\fR, \fB\&int *\fR \fI\&NBS\fR, \fB\&int *\fR \fI\&NB\fR, \fB\&HPL_T_ORDER *\fR \fI\&PMAPPIN\fR, \fB\&int *\fR \fI\&NPQS\fR, \fB\&int *\fR \fI\&P\fR, \fB\&int *\fR \fI\&Q\fR, \fB\&int *\fR \fI\&NPFS\fR, \fB\&HPL_T_FACT *\fR \fI\&PF\fR, \fB\&int *\fR \fI\&NBMS\fR, \fB\&int *\fR \fI\&NBM\fR, \fB\&int *\fR \fI\&NDVS\fR, \fB\&int *\fR \fI\&NDV\fR, \fB\&int *\fR \fI\&NRFS\fR, \fB\&HPL_T_FACT *\fR \fI\&RF\fR, \fB\&int *\fR \fI\&NTPS\fR, \fB\&HPL_T_TOP *\fR \fI\&TP\fR, \fB\&int *\fR \fI\&NDHS\fR, \fB\&int *\fR \fI\&DH\fR, \fB\&HPL_T_SWAP *\fR \fI\&FSWAP\fR, \fB\&int *\fR \fI\&TSWAP\fR, \fB\&int *\fR \fI\&L1NOTRAN\fR, \fB\&int *\fR \fI\&UNOTRAN\fR, \fB\&int *\fR \fI\&EQUIL\fR, \fB\&int *\fR \fI\&ALIGN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdinfo\fR reads the startup information for the various tests and transmits it to all processes. .SH ARGUMENTS .TP 8 TEST (global output) HPL_T_test * On entry, TEST points to a testing data structure. On exit, the fields of this data structure are initialized as follows: TEST->outfp specifies the output file where the results will be printed. It is only defined and used by the process 0 of the grid. TEST->thrsh specifies the threshhold value for the test ratio. TEST->epsil is the relative machine precision of the distributed computer. Finally the test counters, kfail, kpass, kskip, ktest are initialized to zero. .TP 8 NS (global output) int * On exit, NS specifies the number of different problem sizes to be tested. NS is less than or equal to HPL_MAX_PARAM. .TP 8 N (global output) int * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, the first NS entries of this array contain the problem sizes to run the code with. .TP 8 NBS (global output) int * On exit, NBS specifies the number of different distribution blocking factors to be tested. NBS must be less than or equal to HPL_MAX_PARAM. .TP 8 NB (global output) int * On exit, PMAPPIN specifies the process mapping onto the no- des of the MPI machine configuration. PMAPPIN defaults to row-major ordering. .TP 8 PMAPPIN (global output) HPL_T_ORDER * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, the first NBS entries of this array contain the values of the various distribution blocking factors, to run the code with. .TP 8 NPQS (global output) int * On exit, NPQS specifies the number of different values that can be used for P and Q, i.e., the number of process grids to run the code with. NPQS must be less than or equal to HPL_MAX_PARAM. .TP 8 P (global output) int * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, the first NPQS entries of this array contain the values of P, the number of process rows of the NPQS grids to run the code with. .TP 8 Q (global output) int * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, the first NPQS entries of this array contain the values of Q, the number of process columns of the NPQS grids to run the code with. .TP 8 NPFS (global output) int * On exit, NPFS specifies the number of different values that can be used for PF : the panel factorization algorithm to run the code with. NPFS is less than or equal to HPL_MAX_PARAM. .TP 8 PF (global output) HPL_T_FACT * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, the first NPFS entries of this array contain the various panel factorization algorithms to run the code with. .TP 8 NBMS (global output) int * On exit, NBMS specifies the number of various recursive stopping criteria to be tested. NBMS must be less than or equal to HPL_MAX_PARAM. .TP 8 NBM (global output) int * On entry, NBM is an array of dimension HPL_MAX_PARAM. On exit, the first NBMS entries of this array contain the values of the various recursive stopping criteria to be tested. .TP 8 NDVS (global output) int * On exit, NDVS specifies the number of various numbers of panels in recursion to be tested. NDVS is less than or equal to HPL_MAX_PARAM. .TP 8 NDV (global output) int * On entry, NDV is an array of dimension HPL_MAX_PARAM. On exit, the first NDVS entries of this array contain the values of the various numbers of panels in recursion to be tested. .TP 8 NRFS (global output) int * On exit, NRFS specifies the number of different values that can be used for RF : the recursive factorization algorithm to be tested. NRFS is less than or equal to HPL_MAX_PARAM. .TP 8 RF (global output) HPL_T_FACT * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, the first NRFS entries of this array contain the various recursive factorization algorithms to run the code with. .TP 8 NTPS (global output) int * On exit, NTPS specifies the number of different values that can be used for the broadcast topologies to be tested. NTPS is less than or equal to HPL_MAX_PARAM. .TP 8 TP (global output) HPL_T_TOP * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, the first NTPS entries of this array contain the various broadcast (along rows) topologies to run the code with. .TP 8 NDHS (global output) int * On exit, NDHS specifies the number of different values that can be used for the lookahead depths to be tested. NDHS is less than or equal to HPL_MAX_PARAM. .TP 8 DH (global output) int * On entry, DH is an array of dimension HPL_MAX_PARAM. On exit, the first NDHS entries of this array contain the values of lookahead depths to run the code with. Such a value is at least 0 (no-lookahead) or greater than zero. .TP 8 FSWAP (global output) HPL_T_SWAP * On exit, FSWAP specifies the swapping algorithm to be used in all tests. .TP 8 TSWAP (global output) int * On exit, TSWAP specifies the swapping threshold as a number of columns when the mixed swapping algorithm was chosen. .TP 8 L1NOTRA (global output) int * On exit, L1NOTRAN specifies whether the upper triangle of the panels of columns should be stored in no-transposed form (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). .TP 8 UNOTRAN (global output) int * On exit, UNOTRAN specifies whether the panels of rows should be stored in no-transposed form (UNOTRAN=1) or transposed form (UNOTRAN=0) during their broadcast. .TP 8 EQUIL (global output) int * On exit, EQUIL specifies whether equilibration during the swap-broadcast of the panel of rows should be performed (EQUIL=1) or not (EQUIL=0). .TP 8 ALIGN (global output) int * On exit, ALIGN specifies the alignment of the dynamically allocated buffers in double precision words. ALIGN is greater than zero. .SH SEE ALSO .BR HPL_pddriver \ (3), .BR HPL_pdtest \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlamch.30000644000000000000000000000454111256503657013732 00000000000000.TH HPL_pdlamch 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlamch \- determines machine-specific arithmetic constants. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_pdlamch(\fR \fB\&MPI_Comm\fR \fI\&COMM\fR, \fB\&const HPL_T_MACH\fR \fI\&CMACH\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlamch\fR determines machine-specific arithmetic constants such as the relative machine precision (eps), the safe minimum(sfmin) such that 1/sfmin does not overflow, the base of the machine (base), the precision (prec), the number of (base) digits in the mantissa (t), whether rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum exponent before (gradual) underflow (emin), the underflow threshold (rmin)- base**(emin-1), the largest exponent before overflow (emax), the overflow threshold (rmax) - (base**emax)*(1-eps). .SH ARGUMENTS .TP 8 COMM (global/local input) MPI_Comm The MPI communicator identifying the process collection. .TP 8 CMACH (global input) const HPL_T_MACH Specifies the value to be returned by HPL_pdlamch = HPL_MACH_EPS, HPL_pdlamch := eps (default) = HPL_MACH_SFMIN, HPL_pdlamch := sfmin = HPL_MACH_BASE, HPL_pdlamch := base = HPL_MACH_PREC, HPL_pdlamch := eps*base = HPL_MACH_MLEN, HPL_pdlamch := t = HPL_MACH_RND, HPL_pdlamch := rnd = HPL_MACH_EMIN, HPL_pdlamch := emin = HPL_MACH_RMIN, HPL_pdlamch := rmin = HPL_MACH_EMAX, HPL_pdlamch := emax = HPL_MACH_RMAX, HPL_pdlamch := rmax where eps = relative machine precision, sfmin = safe minimum, base = base of the machine, prec = eps*base, t = number of digits in the mantissa, rnd = 1.0 if rounding occurs in addition, emin = minimum exponent before underflow, rmin = underflow threshold, emax = largest exponent before overflow, rmax = overflow threshold. hpcc-1.4.1/hpl/man/man3/HPL_pdlange.30000644000000000000000000000410611256503657013731 00000000000000.TH HPL_pdlange 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlange \- Compute ||A||. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_pdlange(\fR \fB\&const HPL_T_grid *\fR \fI\&GRID\fR, \fB\&const HPL_T_NORM\fR \fI\&NORM\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&const double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlange\fR returns the value of the one norm, or the infinity norm, or the element of largest absolute value of a distributed matrix A: max(abs(A(i,j))) when NORM = HPL_NORM_A, norm1(A), when NORM = HPL_NORM_1, normI(A), when NORM = HPL_NORM_I, where norm1 denotes the one norm of a matrix (maximum column sum) and normI denotes the infinity norm of a matrix (maximum row sum). Note that max(abs(A(i,j))) is not a matrix norm. .SH ARGUMENTS .TP 8 GRID (local input) const HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 NORM (global input) const HPL_T_NORM On entry, NORM specifies the value to be returned by this function as described above. .TP 8 M (global input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (global input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 NB (global input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix. NB must be larger than one. .TP 8 A (local input) const double * On entry, A points to an array of dimension (LDA,LocQ(N)), that contains the local pieces of the distributed matrix A. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,LocP(M)). .SH SEE ALSO .BR HPL_pdlaprnt \ (3), .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlaprnt.30000644000000000000000000000421111256503657014140 00000000000000.TH HPL_pdlaprnt 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlaprnt \- Print a distributed matrix A. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdlaprnt(\fR \fB\&const HPL_T_grid *\fR \fI\&GRID\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const int\fR \fI\&IAROW\fR, \fB\&const int\fR \fI\&IACOL\fR, \fB\&const char *\fR \fI\&CMATNM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlaprnt\fR prints to standard error a distributed matrix A. The local pieces of A are sent to the process of coordinates (0,0) in the grid and then printed. .SH ARGUMENTS .TP 8 GRID (local input) const HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 M (global input) const int On entry, M specifies the number of rows of the coefficient matrix A. M must be at least zero. .TP 8 N (global input) const int On entry, N specifies the number of columns of the coefficient matrix A. N must be at least zero. .TP 8 NB (global input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix. NB must be larger than one. .TP 8 A (local input) double * On entry, A points to an array of dimension (LDA,LocQ(N)). This array contains the coefficient matrix to be printed. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,LocP(M)). .TP 8 IAROW (global input) const int On entry, IAROW specifies the row process coordinate owning the first row of A. IAROW must be larger than or equal to zero and less than NPROW. .TP 8 IACOL (global input) const int On entry, IACOL specifies the column process coordinate owning the first column of A. IACOL must be larger than or equal to zero and less than NPCOL. .TP 8 CMATNM (global input) const char * On entry, CMATNM is the name of the matrix to be printed. .SH SEE ALSO .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlaswp00N.30000644000000000000000000000420011256503657014242 00000000000000.TH HPL_pdlaswp00N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlaswp00N \- Broadcast a column panel L and swap the row panel U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdlaswp00N(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlaswp00N\fR applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. Bi-directional exchange is used to perform the swap :: broadcast of the row panel U at once, resulting in a lower number of messages than usual as well as a lower communication volume. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: log_2(P) * (lat + NB*LocQ(N) / bdwth) where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. Mono directional links will double this communication cost. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be broadcast and swapped) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be swapped and broadcast starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdupdateNN \ (3), .BR HPL_pdupdateTN \ (3), .BR HPL_pipid \ (3), .BR HPL_plindx0 \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03N \ (3), .BR HPL_dlaswp04N \ (3), .BR HPL_dlaswp05N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlaswp00T.30000644000000000000000000000420011256503657014250 00000000000000.TH HPL_pdlaswp00T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlaswp00T \- Broadcast a column panel L and swap the row panel U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdlaswp00T(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlaswp00T\fR applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. Bi-directional exchange is used to perform the swap :: broadcast of the row panel U at once, resulting in a lower number of messages than usual as well as a lower communication volume. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: log_2(P) * (lat + NB*LocQ(N) / bdwth) where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. Mono directional links will double this communication cost. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be broadcast and swapped) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be swapped and broadcast starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdupdateNT \ (3), .BR HPL_pdupdateTT \ (3), .BR HPL_pipid \ (3), .BR HPL_plindx0 \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp02N \ (3), .BR HPL_dlaswp03T \ (3), .BR HPL_dlaswp04T \ (3), .BR HPL_dlaswp05T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlaswp01N.30000644000000000000000000000444111256503657014252 00000000000000.TH HPL_pdlaswp01N 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlaswp01N \- Broadcast a column panel L and swap the row panel U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdlaswp01N(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlaswp01N\fR applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. A "Spread then roll" algorithm performs the swap :: broadcast of the row panel U at once, resulting in a minimal communication volume and a "very good" use of the connectivity if available. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. K is a constant in (2,3] that depends on the achieved bandwidth during a simultaneous message exchange between two processes. An empirical optimistic value of K is typically 2.4. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be swapped and broadcast starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdupdateNN \ (3), .BR HPL_pdupdateTN \ (3), .BR HPL_pipid \ (3), .BR HPL_plindx1 \ (3), .BR HPL_plindx10 \ (3), .BR HPL_spreadN \ (3), .BR HPL_equil \ (3), .BR HPL_rollN \ (3), .BR HPL_dlaswp00N \ (3), .BR HPL_dlaswp01N \ (3), .BR HPL_dlaswp06N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdlaswp01T.30000644000000000000000000000444111256503657014260 00000000000000.TH HPL_pdlaswp01T 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdlaswp01T \- Broadcast a column panel L and swap the row panel U. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdlaswp01T(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdlaswp01T\fR applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. A "Spread then roll" algorithm performs the swap :: broadcast of the row panel U at once, resulting in a minimal communication volume and a "very good" use of the connectivity if available. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. K is a constant in (2,3] that depends on the achieved bandwidth during a simultaneous message exchange between two processes. An empirical optimistic value of K is typically 2.4. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be swapped and broadcast starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdupdateNT \ (3), .BR HPL_pdupdateTT \ (3), .BR HPL_pipid \ (3), .BR HPL_plindx1 \ (3), .BR HPL_plindx10 \ (3), .BR HPL_spreadT \ (3), .BR HPL_equil \ (3), .BR HPL_rollT \ (3), .BR HPL_dlaswp10N \ (3), .BR HPL_dlaswp01T \ (3), .BR HPL_dlaswp06T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdmatgen.30000644000000000000000000000360711256503657014123 00000000000000.TH HPL_pdmatgen 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdmatgen \- Parallel random matrix generator. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdmatgen(\fR \fB\&const HPL_T_grid *\fR \fI\&GRID\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&NB\fR, \fB\&double *\fR \fI\&A\fR, \fB\&const int\fR \fI\&LDA\fR, \fB\&const int\fR \fI\&ISEED\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdmatgen\fR generates (or regenerates) a parallel random matrix A. The pseudo-random generator uses the linear congruential algorithm: X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer Programming, Knuth 1973, Vol. 2. .SH ARGUMENTS .TP 8 GRID (local input) const HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 M (global input) const int On entry, M specifies the number of rows of the matrix A. M must be at least zero. .TP 8 N (global input) const int On entry, N specifies the number of columns of the matrix A. N must be at least zero. .TP 8 NB (global input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .TP 8 A (local output) double * On entry, A points to an array of dimension (LDA,LocQ(N)). On exit, this array contains the coefficients of the randomly generated matrix. .TP 8 LDA (local input) const int On entry, LDA specifies the leading dimension of the array A. LDA must be at least max(1,LocP(M)). .TP 8 ISEED (global input) const int On entry, ISEED specifies the seed number to generate the matrix A. ISEED must be at least zero. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3), .BR HPL_drand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdmxswp.30000644000000000000000000000474611256503657014033 00000000000000.TH HPL_pdmxswp 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdmxswp \- swaps and broacast the pivot row. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdmxswp(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&II\fR, \fB\&const int\fR \fI\&JJ\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdmxswp\fR swaps and broadcasts the absolute value max row using bi-directional exchange. The buffer is partially set by HPL_dlocmax. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) where lat and bdwth are the latency and bandwidth of the network for double precision real elements. Communication only occurs in one process column. Mono-directional links will cause the communication cost to double. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of the matrix column on which this function operates. .TP 8 II (local input) const int On entry, II specifies the row offset where the column to be operated on starts with respect to the panel. .TP 8 JJ (local input) const int On entry, JJ specifies the column offset where the column to be operated on starts with respect to the panel. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2 * (4+2*N0). It is assumed that HPL_dlocmax was called prior to this routine to initialize the first four entries of this array. On exit, the N0 length max row is stored in WORK[4:4+N0-1]; Note that this is also the JJth row (or column) of L1. The remaining part is used as a temporary array. .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpancrN.30000644000000000000000000000576711256503657014102 00000000000000.TH HPL_pdpancrN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpancrN \- Crout panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpancrN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpancrN\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpancrT.30000644000000000000000000000571311256503657014077 00000000000000.TH HPL_pdpancrT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpancrT \- Crout panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpancrT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpancrT\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanel_disp.30000644000000000000000000000125011256503657014756 00000000000000.TH HPL_pdpanel_disp 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanel_disp \- Deallocate a panel data structure. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_pdpanel_disp(\fR \fB\&HPL_T_panel * *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanel_disp\fR deallocates the panel structure and resources and stores the error code returned by the panel factorization. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the address of the panel data structure to be deallocated. .SH SEE ALSO .BR HPL_pdpanel_new \ (3), .BR HPL_pdpanel_init \ (3), .BR HPL_pdpanel_free \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanel_free.30000644000000000000000000000123711256503657014745 00000000000000.TH HPL_pdpanel_free 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanel_free \- Deallocate the panel ressources. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_pdpanel_free(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanel_free\fR deallocates the panel resources and stores the error code returned by the panel factorization. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the panel data structure from which the resources should be deallocated. .SH SEE ALSO .BR HPL_pdpanel_new \ (3), .BR HPL_pdpanel_init \ (3), .BR HPL_pdpanel_disp \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanel_init.30000644000000000000000000000411511256503657014765 00000000000000.TH HPL_pdpanel_init 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanel_init \- Initialize the panel resources. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanel_init(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&JB\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR, \fB\&const int\fR \fI\&IA\fR, \fB\&const int\fR \fI\&JA\fR, \fB\&const int\fR \fI\&TAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanel_init\fR initializes a panel data structure. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 M (local input) const int On entry, M specifies the global number of rows of the panel. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the global number of columns of the panel and trailing submatrix. N must be at least zero. .TP 8 JB (global input) const int On entry, JB specifies is the number of columns of the panel. JB must be at least zero. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .TP 8 IA (global input) const int On entry, IA is the global row index identifying the panel and trailing submatrix. IA must be at least zero. .TP 8 JA (global input) const int On entry, JA is the global column index identifying the panel and trailing submatrix. JA must be at least zero. .TP 8 TAG (global input) const int On entry, TAG is the row broadcast message id. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .SH SEE ALSO .BR HPL_pdpanel_new \ (3), .BR HPL_pdpanel_disp \ (3), .BR HPL_pdpanel_free \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanel_new.30000644000000000000000000000415111256503657014613 00000000000000.TH HPL_pdpanel_new 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanel_new \- Create a panel data structure. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanel_new(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&JB\fR, \fB\&HPL_T_pmat *\fR \fI\&A\fR, \fB\&const int\fR \fI\&IA\fR, \fB\&const int\fR \fI\&JA\fR, \fB\&const int\fR \fI\&TAG\fR, \fB\&HPL_T_panel * *\fR \fI\&PANEL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanel_new\fR creates and initializes a panel data structure. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters. .TP 8 M (local input) const int On entry, M specifies the global number of rows of the panel. M must be at least zero. .TP 8 N (local input) const int On entry, N specifies the global number of columns of the panel and trailing submatrix. N must be at least zero. .TP 8 JB (global input) const int On entry, JB specifies is the number of columns of the panel. JB must be at least zero. .TP 8 A (local input/output) HPL_T_pmat * On entry, A points to the data structure containing the local array information. .TP 8 IA (global input) const int On entry, IA is the global row index identifying the panel and trailing submatrix. IA must be at least zero. .TP 8 JA (global input) const int On entry, JA is the global column index identifying the panel and trailing submatrix. JA must be at least zero. .TP 8 TAG (global input) const int On entry, TAG is the row broadcast message id. .TP 8 PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the address of the panel data structure to create and initialize. .SH SEE ALSO .BR HPL_pdpanel_new \ (3), .BR HPL_pdpanel_init \ (3), .BR HPL_pdpanel_disp \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanllN.30000644000000000000000000000600411256503657014066 00000000000000.TH HPL_pdpanllN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanllN \- Left-looking panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanllN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanllN\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Left-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanllT.30000644000000000000000000000573011256503657014101 00000000000000.TH HPL_pdpanllT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanllT \- Left-looking panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanllT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanllT\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Left-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanrlN.30000644000000000000000000000600611256503657014076 00000000000000.TH HPL_pdpanrlN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanrlN \- Right-looking panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanrlN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanrlN\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Right-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlT \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdpanrlT.30000644000000000000000000000573211256503657014111 00000000000000.TH HPL_pdpanrlT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdpanrlT \- Right-looking panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdpanrlT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdpanrlT\fR factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Right-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpancrN.30000644000000000000000000000521211256503657014245 00000000000000.TH HPL_pdrpancrN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpancrN \- Crout recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpancrN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpancrN\fR HPL_pdrpancrN recursively factorizes a panel of columns using the recursive Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpancrT.30000644000000000000000000000512011256503657014251 00000000000000.TH HPL_pdrpancrT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpancrT \- Crout recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpancrT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpancrT\fR recursively factorizes a panel of columns using the recursive Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpanllN.30000644000000000000000000000520311256503657014250 00000000000000.TH HPL_pdrpanllN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpanllN \- Left-looking recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpanllN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpanllN\fR recursively factorizes a panel of columns using the recursive Left-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpanllT.30000644000000000000000000000512711256503657014263 00000000000000.TH HPL_pdrpanllT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpanllT \- Left-looking recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpanllT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpanllT\fR recursively factorizes a panel of columns using the recursive Left-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpanrlN.30000644000000000000000000000520411256503657014257 00000000000000.TH HPL_pdrpanrlN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpanrlN \- Right-looking recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpanrlN(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpanrlN\fR recursively factorizes a panel of columns using the recursive Right-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlT \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdrpanrlT.30000644000000000000000000000513011256503657014263 00000000000000.TH HPL_pdrpanrlT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdrpanrlT \- Right-looking recursive panel factorization. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdrpanrlT(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&M\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&ICOFF\fR, \fB\&double *\fR \fI\&WORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdrpanrlT\fR recursively factorizes a panel of columns using the recursive Right-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 M (local input) const int On entry, M specifies the local number of rows of sub(A). .TP 8 N (local input) const int On entry, N specifies the local number of columns of sub(A). .TP 8 ICOFF (global input) const int On entry, ICOFF specifies the row and column offset of sub(A) in A. .TP 8 WORK (local workspace) double * On entry, WORK is a workarray of size at least 2*(4+2*N0). .SH SEE ALSO .BR HPL_dlocmax \ (3), .BR HPL_dlocswpN \ (3), .BR HPL_dlocswpT \ (3), .BR HPL_pdmxswp \ (3), .BR HPL_pdpancrN \ (3), .BR HPL_pdpancrT \ (3), .BR HPL_pdpanllN \ (3), .BR HPL_pdpanllT \ (3), .BR HPL_pdpanrlN \ (3), .BR HPL_pdpanrlT \ (3), .BR HPL_pdrpancrN \ (3), .BR HPL_pdrpancrT \ (3), .BR HPL_pdrpanllN \ (3), .BR HPL_pdrpanllT \ (3), .BR HPL_pdrpanrlN \ (3), .BR HPL_pdfact \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdtest.30000644000000000000000000000427611256503657013632 00000000000000.TH HPL_pdtest 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdtest \- Perform one test. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdtest(\fR \fB\&HPL_T_test *\fR \fI\&TEST\fR, \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_palg *\fR \fI\&ALGO\fR, \fB\&const int\fR \fI\&N\fR, \fB\&const int\fR \fI\&NB\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdtest\fR performs one test given a set of parameters such as the process grid, the problem size, the distribution blocking factor ... This function generates the data, calls and times the linear system solver, checks the accuracy of the obtained vector solution and writes this information to the file pointed to by TEST->outfp. .SH ARGUMENTS .TP 8 TEST (global input) HPL_T_test * On entry, TEST points to a testing data structure: outfp specifies the output file where the results will be printed. It is only defined and used by the process 0 of the grid. thrsh specifies the threshhold value for the test ratio. Concretely, a test is declared "PASSED" if and only if the following inequality is satisfied: ||Ax-b||_oo / ( epsil * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N ) < thrsh. epsil is the relative machine precision of the distributed computer. Finally the test counters, kfail, kpass, kskip and ktest are updated as follows: if the test passes, kpass is incremented by one; if the test fails, kfail is incremented by one; if the test is skipped, kskip is incremented by one. ktest is left unchanged. .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 ALGO (global input) HPL_T_palg * On entry, ALGO points to the data structure containing the algorithmic parameters to be used for this test. .TP 8 N (global input) const int On entry, N specifies the order of the coefficient matrix A. N must be at least zero. .TP 8 NB (global input) const int On entry, NB specifies the blocking factor used to partition and distribute the matrix A. NB must be larger than one. .SH SEE ALSO .BR HPL_pddriver \ (3), .BR HPL_pdinfo \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdtrsv.30000644000000000000000000000350711256503657013645 00000000000000.TH HPL_pdtrsv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdtrsv \- Solve triu( A ) x = b. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdtrsv(\fR \fB\&HPL_T_grid *\fR \fI\&GRID\fR, \fB\&HPL_T_pmat *\fR \fI\&AMAT\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdtrsv\fR solves an upper triangular system of linear equations. The rhs is the last column of the N by N+1 matrix A. The solve starts in the process column owning the Nth column of A, so the rhs b may need to be moved one process column to the left at the beginning. The routine therefore needs a column vector in every process column but the one owning b. The result is replicated in all process rows, and returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. The algorithm uses decreasing one-ring broadcast in process rows and columns implemented in terms of synchronous communication point to point primitives. The lookahead of depth 1 is used to minimize the critical path. This entire operation is essentially ``latency'' bound and an estimate of its running time is given by: (move rhs) lat + N / ( P bdwth ) + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + gam2 N^2 / ( P Q ), where gam2 is an estimate of the Level 2 BLAS rate of execution. There are N / NB diagonal blocks. One must exchange 2 messages of length NB to compute the next NB entries of the vector solution, as well as performing a total of N^2 floating point operations. .SH ARGUMENTS .TP 8 GRID (local input) HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 AMAT (local input/output) HPL_T_pmat * On entry, AMAT points to the data structure containing the local array information. .SH SEE ALSO .BR HPL_pdgesv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdupdateNN.30000644000000000000000000000264111256503657014363 00000000000000.TH HPL_pdupdateNN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdupdateNN \- Broadcast a panel and update the trailing submatrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdupdateNN(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdupdateNN\fR broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local output) int * On exit, IFLAG indicates whether or not the broadcast has been completed when PBCST is not NULL on entry. In that case, IFLAG is left unchanged. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be updated) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be updated starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp01N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdupdateNT.30000644000000000000000000000264111256503657014371 00000000000000.TH HPL_pdupdateNT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdupdateNT \- Broadcast a panel and update the trailing submatrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdupdateNT(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdupdateNT\fR broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local output) int * On exit, IFLAG indicates whether or not the broadcast has been completed when PBCST is not NULL on entry. In that case, IFLAG is left unchanged. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be updated) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be updated starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdupdateTN.30000644000000000000000000000264111256503657014371 00000000000000.TH HPL_pdupdateTN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdupdateTN \- Broadcast a panel and update the trailing submatrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdupdateTN(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdupdateTN\fR broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local output) int * On exit, IFLAG indicates whether or not the broadcast has been completed when PBCST is not NULL on entry. In that case, IFLAG is left unchanged. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be updated) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be updated starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp01N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pdupdateTT.30000644000000000000000000000264111256503657014377 00000000000000.TH HPL_pdupdateTT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pdupdateTT \- Broadcast a panel and update the trailing submatrix. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pdupdateTT(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&NN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pdupdateTT\fR broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local output) int * On exit, IFLAG indicates whether or not the broadcast has been completed when PBCST is not NULL on entry. In that case, IFLAG is left unchanged. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be updated) information. .TP 8 NN (local input) const int On entry, NN specifies the local number of columns of the trailing submatrix to be updated starting at the current position. NN must be at least zero. .SH SEE ALSO .BR HPL_pdgesv \ (3), .BR HPL_pdgesv0 \ (3), .BR HPL_pdgesvK1 \ (3), .BR HPL_pdgesvK2 \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_perm.30000644000000000000000000000322711256503657013265 00000000000000.TH HPL_perm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_perm \- Combine 2 index arrays - Generate the permutation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_perm(\fR \fB\&const int\fR \fI\&N\fR, \fB\&int *\fR \fI\&LINDXA\fR, \fB\&int *\fR \fI\&LINDXAU\fR, \fB\&int *\fR \fI\&IWORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_perm\fR combines two index arrays and generate the corresponding permutation. First, this function computes the inverse of LINDXA, and then combine it with LINDXAU. Second, in order to be able to perform the permutation in place, LINDXAU is overwritten by the sequence of permutation producing the same result. What we ultimately want to achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the call to this function, this in place permutation can be performed by for i in [0..N) swap U[i] with U[LINDXAU[i]]. .SH ARGUMENTS .TP 8 N (global input) const int On entry, N specifies the length of the arrays LINDXA and LINDXAU. N should be at least zero. .TP 8 LINDXA (global input/output) int * On entry, LINDXA is an array of dimension N containing the source indexes. On exit, LINDXA contains the combined index array. .TP 8 LINDXAU (global input/output) int * On entry, LINDXAU is an array of dimension N containing the target indexes. On exit, LINDXAU contains the sequence of permutation, that should be applied in increasing order to permute the underlying array U in place. .TP 8 IWORK (workspace) int * On entry, IWORK is a workarray of dimension N. .SH SEE ALSO .BR HPL_plindx1 \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pipid.30000644000000000000000000000624511256503657013432 00000000000000.TH HPL_pipid 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pipid \- Simplify the pivot vector. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pipid(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&int *\fR \fI\&K\fR, \fB\&int *\fR \fI\&IPID\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pipid\fR computes an array IPID that contains the source and final destination of matrix rows resulting from the application of N interchanges as computed by the LU factorization with row partial pivoting. The array IPID is such that the row of global index IPID(i) should be mapped onto the row of global index IPID(i+1). Note that we cannot really know the length of IPID a priori. However, we know that this array is at least 2*N long, since there are N rows to swap and broadcast. The length of this array must be smaller than or equal to 4*N, since every row is swapped with at most a single distinct remote row. The algorithm constructing IPID goes as follows: Let IA be the global index of the first row to be swapped. For every row src IA + i with i in [0..N) to be swapped with row dst such that dst is given by DPIV[i]: Is row src the destination of a previous row of the current block, that is, is there k odd such that IPID(k) is equal to src ? Yes: update this destination with dst. For example, if the pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it was thought so far ... No : add the pair (src,dst) at the end of IPID; row src has not been moved yet. Is row dst different from src the destination of a previous row of the current block, i.e., is there k odd such that IPID(k) is equal to dst ? Yes: update IPID(k) with src. For example, if the pivot array is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought so far ... No : add the pair (dst,src) at the end of IPID; row dst has not been moved yet. Note that when src is equal to dst, the pair (dst,src) should not be added to IPID in order to avoid duplicated entries in this array. During the construction of the array IPID, we make sure that the first N entries are such that IPID(k) with k odd is equal to IA+k/2. For k in [0..K/2), the row of global index IPID(2*k) should be mapped onto the row of global index IPID(2*k+1). .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 K (global output) int * On exit, K specifies the number of entries in IPID. K is at least 2*N, and at most 4*N. .TP 8 IPID (global output) int * On entry, IPID is an array of length 4*N. On exit, the first K entries of that array contain the src and final destination resulting from the application of the N interchanges as specified by DPIV. The pairs (src,dst) are contiguously stored and sorted so that IPID(2*i+1) is equal to IA+i with i in [0..N) .SH SEE ALSO .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_plindx0.30000644000000000000000000001544311256503657013703 00000000000000.TH HPL_plindx0 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_plindx0 \- Compute local swapping index arrays. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_plindx0(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&K\fR, \fB\&int *\fR \fI\&IPID\fR, \fB\&int *\fR \fI\&LINDXA\fR, \fB\&int *\fR \fI\&LINDXAU\fR, \fB\&int *\fR \fI\&LLEN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_plindx0\fR computes two local arrays LINDXA and LINDXAU containing the local source and final destination position resulting from the application of row interchanges. On entry, the array IPID of length K is such that the row of global index IPID(i) should be mapped onto row of global index IPID(i+1). Let IA be the global index of the first row to be swapped. For k in [0..K/2), the row of global index IPID(2*k) should be mapped onto the row of global index IPID(2*k+1). The question then, is to determine which rows should ultimately be part of U. First, some rows of the process ICURROW may be swapped locally. One of this row belongs to U, the other one belongs to my local piece of A. The other rows of the current block are swapped with remote rows and are thus not part of U. These rows however should be sent along, and grabbed by the other processes as we progress in the exchange phase. So, assume that I am ICURROW and consider a row of index IPID(2*i) that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less than N, this row is locally swapped and should be copied into U at the position IPID(2*i+1) - IA. No row will be exchanged for this one. If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be locally copied into my local piece of A at the position corresponding to the row of global index IPID(2*i+1). If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) is to be swapped away and strictly speaking does not belong to U, but to A remotely. Since this process will however send this array U, this row is copied into U, exactly where the row IPID(2*i+1) should go. For this, we search IPID for k1, such that IPID(2*k1) is equal to IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position IPID(2*k1+1)-IA. It is thus important to put the rows that go into U, i.e., such that IPID(2*i+1) - IA is less than N at the begining of the array IPID. By doing so, U is formed, and the local copy is performed in just one sweep. Two lists LINDXA and LINDXAU are built. LINDXA contains the local index of the rows I have that should be copied. LINDXAU contains the local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) of A should be locally copied into A(-LINDXAU(k),:). In the process ICURROW, the initial packing algorithm proceeds as follows. for all entries in IPID, if IPID(2*i) is in ICURROW, if IPID(2*i+1) is in ICURROW, if( IPID(2*i+1) - IA < N ) save corresponding local position of this row (LINDXA); save local position (LINDXAU) in U where this row goes; [copy row IPID(2*i) in U at position IPID(2*i+1)-IA; ]; else save corresponding local position of this row (LINDXA); save local position (-LINDXAU) in A where this row goes; [copy row IPID(2*i) in my piece of A at IPID(2*i+1);] end if else find k1 such that IPID(2*k1) = IPID(2*i+1); copy row IPID(2*i) in U at position IPID(2*k1+1)-IA; save corresponding local position of this row (LINDXA); save local position (LINDXAU) in U where this row goes; end if end if end for Second, if I am not the current row process ICURROW, all source rows in IPID that I own are part of U. Indeed, they are swapped with one row of the current block of rows, and the main factorization algorithm proceeds one row after each other. The processes different from ICURROW, should exchange and accumulate those rows until they receive some data previously owned by the process ICURROW. In processes different from ICURROW, the initial packing algorithm proceeds as follows. Consider a row of global index IPID(2*i) that I own. When I will be receiving data previously owned by ICURROW, i.e., U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, and this particular row of U should be first copied into my piece of A, at A(il,:), where il is the local row index corresponding to IPID(2*i). Now,initially, this row will be packed into workspace, say as the kth row of that work array. The following algorithm sets LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row should be copied. LINDXA(k) stores the local index in A where this row of U should be copied, i.e il. for all entries in IPID, if IPID(2*i) is not in ICURROW, copy row IPID(2*i) in work array; save corresponding local position of this row (LINDXA); save position (LINDXAU) in U where this row should be copied; end if end for Since we are at it, we also globally figure out how many rows every process has. That is necessary, because it would rather be cumbersome to figure it on the fly during the bi-directional exchange phase. This information is kept in the array LLEN of size NPROW. Also note that the arrays LINDXA and LINDXAU are of max length equal to 2*N. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 K (global input) const int On entry, K specifies the number of entries in IPID. K is at least 2*N, and at most 4*N. .TP 8 IPID (global input) int * On entry, IPID is an array of length K. The first K entries of that array contain the src and final destination resulting from the application of the interchanges. .TP 8 LINDXA (local output) int * On entry, LINDXA is an array of dimension 2*N. On exit, this array contains the local indexes of the rows of A I have that should be copied into U. .TP 8 LINDXAU (local output) int * On exit, LINDXAU is an array of dimension 2*N. On exit, this array contains the local destination information encoded as follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) of A should be locally copied into A(-LINDXAU(k),:). .TP 8 LLEN (global output) int * On entry, LLEN is an array of length NPROW. On exit, it contains how many rows every process has. .SH SEE ALSO .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_plindx1.30000644000000000000000000000720511256503657013701 00000000000000.TH HPL_plindx1 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_plindx1 \- Compute local swapping index arrays. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_plindx1(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&K\fR, \fB\&const int *\fR \fI\&IPID\fR, \fB\&int *\fR \fI\&IPA\fR, \fB\&int *\fR \fI\&LINDXA\fR, \fB\&int *\fR \fI\&LINDXAU\fR, \fB\&int *\fR \fI\&IPLEN\fR, \fB\&int *\fR \fI\&IPMAP\fR, \fB\&int *\fR \fI\&IPMAPM1\fR, \fB\&int *\fR \fI\&PERMU\fR, \fB\&int *\fR \fI\&IWORK\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_plindx1\fR computes two local arrays LINDXA and LINDXAU containing the local source and final destination position resulting from the application of row interchanges. In addition, this function computes three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic mapping information for the spreading phase. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 K (global input) const int On entry, K specifies the number of entries in IPID. K is at least 2*N, and at most 4*N. .TP 8 IPID (global input) const int * On entry, IPID is an array of length K. The first K entries of that array contain the src and final destination resulting from the application of the interchanges. .TP 8 IPA (global output) int * On exit, IPA specifies the number of rows that the current process row has that either belong to U or should be swapped with remote rows of A. .TP 8 LINDXA (global output) int * On entry, LINDXA is an array of dimension 2*N. On exit, this array contains the local indexes of the rows of A I have that should be copied into U. .TP 8 LINDXAU (global output) int * On exit, LINDXAU is an array of dimension 2*N. On exit, this array contains the local destination information encoded as follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) of A should be locally copied into A(-LINDXAU(k),:). .TP 8 IPLEN (global output) int * On entry, IPLEN is an array of dimension NPROW + 1. On exit, this array is such that IPLEN[i] is the number of rows of A in the processes before process IPMAP[i] after the sort with the convention that IPLEN[nprow] is the total number of rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the local number of rows of A that should be moved to the process IPMAP[i]. IPLEN is such that the number of rows of the source process row can be computed as IPLEN[1] - IPLEN[0], and the remaining entries of this array are sorted so that the quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. .TP 8 IPMAP (global output) int * On entry, IPMAP is an array of dimension NPROW. On exit, this array contains the logarithmic mapping of the processes. In other words, IPMAP[myrow] is the corresponding sorted process coordinate. .TP 8 IPMAPM1 (global output) int * On entry, IPMAPM1 is an array of dimension NPROW. On exit, this array contains the inverse of the logarithmic mapping contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in [0.. NPROCS) .TP 8 PERMU (global output) int * On entry, PERMU is an array of dimension JB. On exit, PERMU contains a sequence of permutations, that should be applied in increasing order to permute in place the row panel U. .TP 8 IWORK (workspace) int * On entry, IWORK is a workarray of dimension 2*JB. .SH SEE ALSO .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_plindx10.30000644000000000000000000000442611256503657013763 00000000000000.TH HPL_plindx10 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_plindx10 \- Compute the logarithmic maps for the spreading. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_plindx10(\fR \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&K\fR, \fB\&const int *\fR \fI\&IPID\fR, \fB\&int *\fR \fI\&IPLEN\fR, \fB\&int *\fR \fI\&IPMAP\fR, \fB\&int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_plindx10\fR computes three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic mapping information for the spreading phase. .SH ARGUMENTS .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel information. .TP 8 K (global input) const int On entry, K specifies the number of entries in IPID. K is at least 2*N, and at most 4*N. .TP 8 IPID (global input) const int * On entry, IPID is an array of length K. The first K entries of that array contain the src and final destination resulting from the application of the interchanges. .TP 8 IPLEN (global output) int * On entry, IPLEN is an array of dimension NPROW + 1. On exit, this array is such that IPLEN[i] is the number of rows of A in the processes before process IMAP[i] after the sort, with the convention that IPLEN[nprow] is the total number of rows. In other words, IPLEN[i+1] - IPLEN[i] is the local number of rows of A that should be moved for each process. IPLEN is such that the number of rows of the source process row can be computed as IPLEN[1] - IPLEN[0], and the remaining entries of this array are sorted so that the quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. .TP 8 IPMAP (global output) int * On entry, IPMAP is an array of dimension NPROW. On exit, this array contains the logarithmic mapping of the processes. In other words, IPMAP[myrow] is the corresponding sorted process coordinate. .TP 8 IPMAPM1 (global output) int * On entry, IPMAPM1 is an array of dimension NPROW. On exit, this array contains the inverse of the logarithmic mapping contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in [0.. NPROW) .SH SEE ALSO .BR HPL_pdlaswp00N \ (3), .BR HPL_pdlaswp00T \ (3), .BR HPL_pdlaswp01N \ (3), .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pnum.30000644000000000000000000000207611256503657013302 00000000000000.TH HPL_pnum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pnum \- Rank determination. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_pnum(\fR \fB\&const HPL_T_grid *\fR \fI\&GRID\fR, \fB\&const int\fR \fI\&MYROW\fR, \fB\&const int\fR \fI\&MYCOL\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pnum\fR determines the rank of a process as a function of its coordinates in the grid. .SH ARGUMENTS .TP 8 GRID (local input) const HPL_T_grid * On entry, GRID points to the data structure containing the process grid information. .TP 8 MYROW (local input) const int On entry, MYROW specifies the row coordinate of the process whose rank is to be determined. MYROW must be greater than or equal to zero and less than NPROW. .TP 8 MYCOL (local input) const int On entry, MYCOL specifies the column coordinate of the process whose rank is to be determined. MYCOL must be greater than or equal to zero and less than NPCOL. .SH SEE ALSO .BR HPL_grid_init \ (3), .BR HPL_grid_info \ (3), .BR HPL_grid_exit \ (3). hpcc-1.4.1/hpl/man/man3/HPL_ptimer.30000644000000000000000000000233111256503657013615 00000000000000.TH HPL_ptimer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_ptimer \- Timer facility. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_ptimer(\fR \fB\&const int\fR \fI\&I\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_ptimer\fR provides a "stopwatch" functionality cpu/wall timer in seconds. Up to 64 separate timers can be functioning at once. The first call starts the timer, and the second stops it. This routine can be disenabled by calling HPL_ptimer_disable(), so that calls to the timer are ignored. This feature can be used to make sure certain sections of code do not affect timings, even if they call routines which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable the timer functionality. One can retrieve the current value of a timer by calling t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) where I is the timer index in [0..64). To inititialize the timer functionality, one must have called HPL_ptimer_boot() prior to any of the functions mentioned above. .SH ARGUMENTS .TP 8 I (global input) const int On entry, I specifies the timer to stop/start. .SH SEE ALSO .BR HPL_ptimer_cputime \ (3), .BR HPL_ptimer_walltime \ (3). hpcc-1.4.1/hpl/man/man3/HPL_ptimer_cputime.30000644000000000000000000000170711256503657015351 00000000000000.TH HPL_ptimer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_ptimer_cputime \- Return the CPU time. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_ptimer_cputime();\fR .SH DESCRIPTION \fB\&HPL_ptimer_cputime\fR returns the cpu time. If HPL_USE_CLOCK is defined, the clock() function is used to return an approximation of processor time used by the program. The value returned is the CPU time used so far as a clock_t; to get the number of seconds used, the result is divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C standard library. If HPL_USE_TIMES is defined, the times() function is used instead. This function returns the current process times. times() returns the number of clock ticks that have elapsed since the system has been up. Otherwise and by default, the standard library function getrusage() is used. .SH SEE ALSO .BR HPL_ptimer_walltime \ (3), .BR HPL_ptimer \ (3). hpcc-1.4.1/hpl/man/man3/HPL_ptimer_walltime.30000644000000000000000000000060111256503657015511 00000000000000.TH HPL_ptimer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_ptimer_walltime \- Return the elapsed (wall-clock) time. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_ptimer_walltime();\fR .SH DESCRIPTION \fB\&HPL_ptimer_walltime\fR returns the elapsed (wall-clock) time. .SH SEE ALSO .BR HPL_ptimer_cputime \ (3), .BR HPL_ptimer \ (3). hpcc-1.4.1/hpl/man/man3/HPL_pwarn.30000644000000000000000000000221211256503657013442 00000000000000.TH HPL_pwarn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_pwarn \- displays an error message. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_pwarn(\fR \fB\&FILE *\fR \fI\&STREAM\fR, \fB\&int\fR \fI\&LINE\fR, \fB\&const char *\fR \fI\&SRNAME\fR, \fB\&const char *\fR \fI\&FORM\fR, \fB\&...\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_pwarn\fR displays an error message. .SH ARGUMENTS .TP 8 STREAM (local input) FILE * On entry, STREAM specifies the output stream. .TP 8 LINE (local input) int On entry, LINE specifies the line number in the file where the error has occured. When LINE is not a positive line number, it is ignored. .TP 8 SRNAME (local input) const char * On entry, SRNAME should be the name of the routine calling this error handler. .TP 8 FORM (local input) const char * On entry, FORM specifies the format, i.e., how the subsequent arguments are converted for output. .TP 8 (local input) ... On entry, ... is the list of arguments to be printed within the format string. .SH SEE ALSO .BR HPL_pabort \ (3), .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_rand.30000644000000000000000000000206111256503657013241 00000000000000.TH HPL_rand 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_rand \- random number generator. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_rand();\fR .SH DESCRIPTION \fB\&HPL_rand\fR generates the next number in the random sequence. This function ensures that this number lies in the interval (-0.5, 0.5]. The static array irand contains the information (2 integers) required to generate the next number in the sequence X(n). This number is computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the constant d is the largest 64 bit positive integer. The array irand is then updated for the generation of the next number X(n+1) in the random sequence as follows X(n+1) = a * X(n) + c. The constants a and c should have been preliminarily stored in the arrays ias and ics as 2 pairs of integers. The initialization of ias, ics and irand is performed by the function HPL_setran. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3). hpcc-1.4.1/hpl/man/man3/HPL_recv.30000644000000000000000000000256411256503657013264 00000000000000.TH HPL_recv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_recv \- Receive a message. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_recv(\fR \fB\&double *\fR \fI\&RBUF\fR, \fB\&int\fR \fI\&RCOUNT\fR, \fB\&int\fR \fI\&SRC\fR, \fB\&int\fR \fI\&RTAG\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_recv\fR is a simple wrapper around MPI_Recv. Its main purpose is to allow for some experimentation / tuning of this simple routine. Successful completion is indicated by the returned error code HPL_SUCCESS. In the case of messages of length less than or equal to zero, this function returns immediately. .SH ARGUMENTS .TP 8 RBUF (local output) double * On entry, RBUF specifies the starting address of buffer to be received. .TP 8 RCOUNT (local input) int On entry, RCOUNT specifies the number of double precision entries in RBUF. RCOUNT must be at least zero. .TP 8 SRC (local input) int On entry, SRC specifies the rank of the sending process in the communication space defined by COMM. .TP 8 RTAG (local input) int On entry, STAG specifies the message tag to be used for this communication operation. .TP 8 COMM (local input) MPI_Comm The MPI communicator identifying the communication space. .SH SEE ALSO .BR HPL_send \ (3), .BR HPL_sendrecv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_reduce.30000644000000000000000000000316411256503657013571 00000000000000.TH HPL_reduce 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_reduce \- Reduce operation. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_reduce(\fR \fB\&void *\fR \fI\&BUFFER\fR, \fB\&const int\fR \fI\&COUNT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR, \fB\&const HPL_T_OP \fR \fI\&OP\fR, \fB\&const int\fR \fI\&ROOT\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_reduce\fR performs a global reduce operation across all processes of a group. Note that the input buffer is used as workarray and in all processes but the accumulating process corrupting the original data. .SH ARGUMENTS .TP 8 BUFFER (local input/output) void * On entry, BUFFER points to the buffer to be reduced. On exit, and in process of rank ROOT this array contains the reduced data. This buffer is also used as workspace during the operation in the other processes of the group. .TP 8 COUNT (global input) const int On entry, COUNT indicates the number of entries in BUFFER. COUNT must be at least zero. .TP 8 DTYPE (global input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .TP 8 OP (global input) const HPL_T_OP On entry, OP is a pointer to the local combine function. .TP 8 ROOT (global input) const int On entry, ROOT is the coordinate of the accumulating process. .TP 8 COMM (global/local input) MPI_Comm The MPI communicator identifying the process collection. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_all_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_min \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_rollN.30000644000000000000000000000457211256503657013414 00000000000000.TH HPL_rollN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_rollN \- Roll U and forward the column panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_rollN(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&IPLEN\fR, \fB\&const int *\fR \fI\&IPMAP\fR, \fB\&const int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_rollN\fR rolls the local arrays containing the local pieces of U, so that on exit to this function U is replicated in every process row. In addition, this function probe for the presence of the column panel and forwards it when available. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be rolled) information. .TP 8 N (local input) const int On entry, N specifies the number of columns of U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U is an array of dimension (LDU,*) containing the local pieces of U in each process row. .TP 8 LDU (local input) const int On entry, LDU specifies the local leading dimension of U. LDU should be at least MAX(1,IPLEN[NPROW]). .TP 8 IPLEN (global input) const int * On entry, IPLEN is an array of dimension NPROW+1. This array is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U in each process row. .TP 8 IPMAP (global input) const int * On entry, IMAP is an array of dimension NPROW. This array contains the logarithmic mapping of the processes. In other words, IMAP[myrow] is the absolute coordinate of the sorted process. .TP 8 IPMAPM1 (global input) const int * On entry, IMAPM1 is an array of dimension NPROW. This array contains the inverse of the logarithmic mapping contained in IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. .SH SEE ALSO .BR HPL_pdlaswp01N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_rollT.30000644000000000000000000000456211256503657013421 00000000000000.TH HPL_rollT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_rollT \- Roll U and forward the column panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_rollT(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int *\fR \fI\&IPLEN\fR, \fB\&const int *\fR \fI\&IPMAP\fR, \fB\&const int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_rollT\fR rolls the local arrays containing the local pieces of U, so that on exit to this function U is replicated in every process row. In addition, this function probe for the presence of the column panel and forwards it when available. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be rolled) information. .TP 8 N (local input) const int On entry, N specifies the local number of rows of U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U is an array of dimension (LDU,*) containing the local pieces of U in each process row. .TP 8 LDU (local input) const int On entry, LDU specifies the local leading dimension of U. LDU should be at least MAX(1,N). .TP 8 IPLEN (global input) const int * On entry, IPLEN is an array of dimension NPROW+1. This array is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U in each process row. .TP 8 IPMAP (global input) const int * On entry, IMAP is an array of dimension NPROW. This array contains the logarithmic mapping of the processes. In other words, IMAP[myrow] is the absolute coordinate of the sorted process. .TP 8 IPMAPM1 (global input) const int * On entry, IMAPM1 is an array of dimension NPROW. This array contains the inverse of the logarithmic mapping contained in IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. .SH SEE ALSO .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_sdrv.30000644000000000000000000000354311256503657013301 00000000000000.TH HPL_sdrv 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_sdrv \- Send and receive a message. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_sdrv(\fR \fB\&double *\fR \fI\&SBUF\fR, \fB\&int\fR \fI\&SCOUNT\fR, \fB\&int\fR \fI\&STAG\fR, \fB\&double *\fR \fI\&RBUF\fR, \fB\&int\fR \fI\&RCOUNT\fR, \fB\&int\fR \fI\&RTAG\fR, \fB\&int\fR \fI\&PARTNER\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_sdrv\fR is a simple wrapper around MPI_Sendrecv. Its main purpose is to allow for some experimentation and tuning of this simple function. Messages of length less than or equal to zero are not sent nor received. Successful completion is indicated by the returned error code HPL_SUCCESS. .SH ARGUMENTS .TP 8 SBUF (local input) double * On entry, SBUF specifies the starting address of buffer to be sent. .TP 8 SCOUNT (local input) int On entry, SCOUNT specifies the number of double precision entries in SBUF. SCOUNT must be at least zero. .TP 8 STAG (local input) int On entry, STAG specifies the message tag to be used for the sending communication operation. .TP 8 RBUF (local output) double * On entry, RBUF specifies the starting address of buffer to be received. .TP 8 RCOUNT (local input) int On entry, RCOUNT specifies the number of double precision entries in RBUF. RCOUNT must be at least zero. .TP 8 RTAG (local input) int On entry, RTAG specifies the message tag to be used for the receiving communication operation. .TP 8 PARTNER (local input) int On entry, PARTNER specifies the rank of the collaborative process in the communication space defined by COMM. .TP 8 COMM (local input) MPI_Comm The MPI communicator identifying the communication space. .SH SEE ALSO .BR HPL_send \ (3), .BR HPL_recv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_send.30000644000000000000000000000255611256503657013257 00000000000000.TH HPL_send 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_send \- Send a message. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&int\fR \fB\&HPL_send(\fR \fB\&double *\fR \fI\&SBUF\fR, \fB\&int\fR \fI\&SCOUNT\fR, \fB\&int\fR \fI\&DEST\fR, \fB\&int\fR \fI\&STAG\fR, \fB\&MPI_Comm\fR \fI\&COMM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_send\fR is a simple wrapper around MPI_Send. Its main purpose is to allow for some experimentation / tuning of this simple routine. Successful completion is indicated by the returned error code MPI_SUCCESS. In the case of messages of length less than or equal to zero, this function returns immediately. .SH ARGUMENTS .TP 8 SBUF (local input) double * On entry, SBUF specifies the starting address of buffer to be sent. .TP 8 SCOUNT (local input) int On entry, SCOUNT specifies the number of double precision entries in SBUF. SCOUNT must be at least zero. .TP 8 DEST (local input) int On entry, DEST specifies the rank of the receiving process in the communication space defined by COMM. .TP 8 STAG (local input) int On entry, STAG specifies the message tag to be used for this communication operation. .TP 8 COMM (local input) MPI_Comm The MPI communicator identifying the communication space. .SH SEE ALSO .BR HPL_recv \ (3), .BR HPL_sendrecv \ (3). hpcc-1.4.1/hpl/man/man3/HPL_setran.30000644000000000000000000000231611256503657013614 00000000000000.TH HPL_setran 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_setran \- Manage the random number generator. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_setran(\fR \fB\&const int\fR \fI\&OPTION\fR, \fB\&int *\fR \fI\&IRAN\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_setran\fR initializes the random generator with the encoding of the first number X(0) in the sequence, and the constants a and c used to compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), a and c are stored in the static variables irand, ias and ics. When OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the values of the input array IRAN. When OPTION is 3, IRAN is set to the current value of irand, and irand is then incremented. .SH ARGUMENTS .TP 8 OPTION (local input) const int On entry, OPTION is an integer that specifies the operations to be performed on the random generator as specified above. .TP 8 IRAN (local input/output) int * On entry, IRAN is an array of dimension 2, that contains the 16-lower and 15-higher bits of a random number. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_xjumpm \ (3), .BR HPL_jumpit \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/man/man3/HPL_spreadN.30000644000000000000000000000632611256503657013721 00000000000000.TH HPL_spreadN 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_spreadN \- Spread row panel U and forward current column panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_spreadN(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const enum HPL_SIDE\fR \fI\&SIDE\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int\fR \fI\&SRCDIST\fR, \fB\&const int *\fR \fI\&IPLEN\fR, \fB\&const int *\fR \fI\&IPMAP\fR, \fB\&const int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_spreadN\fR spreads the local array containing local pieces of U, so that on exit to this function, a piece of U is contained in every process row. The array IPLEN contains the number of rows of U, that should be spread on any given process row. This function also probes for the presence of the column panel PBCST. In case of success, this panel will be forwarded. If PBCST is NULL on input, this probing mechanism will be disabled. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be spread) information. .TP 8 SIDE (global input) const enum HPL_SIDE On entry, SIDE specifies whether the local piece of U located in process IPMAP[SRCDIST] should be spread to the right or to the left. This feature is used by the equilibration process. .TP 8 N (global input) const int On entry, N specifies the local number of columns of U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U is an array of dimension (LDU,*) containing the local pieces of U. .TP 8 LDU (local input) const int On entry, LDU specifies the local leading dimension of U. LDU should be at least MAX(1,IPLEN[nprow]). .TP 8 SRCDIST (local input) const int On entry, SRCDIST specifies the source process that spreads its piece of U. .TP 8 IPLEN (global input) const int * On entry, IPLEN is an array of dimension NPROW+1. This array is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U in each process before process IPMAP[i], with the convention that IPLEN[nprow] is the total number of rows. In other words IPLEN[i+1] - IPLEN[i] is the local number of rows of U that should be moved to process IPMAP[i]. .TP 8 IPMAP (global input) const int * On entry, IPMAP is an array of dimension NPROW. This array contains the logarithmic mapping of the processes. In other words, IPMAP[myrow] is the absolute coordinate of the sorted process. .TP 8 IPMAPM1 (global input) const int * On entry, IPMAPM1 is an array of dimension NPROW. This array contains the inverse of the logarithmic mapping contained in IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. .SH SEE ALSO .BR HPL_pdlaswp01N \ (3). hpcc-1.4.1/hpl/man/man3/HPL_spreadT.30000644000000000000000000000631711256503657013727 00000000000000.TH HPL_spreadT 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_spreadT \- Spread row panel U and forward current column panel. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_spreadT(\fR \fB\&HPL_T_panel *\fR \fI\&PBCST\fR, \fB\&int *\fR \fI\&IFLAG\fR, \fB\&HPL_T_panel *\fR \fI\&PANEL\fR, \fB\&const enum HPL_SIDE\fR \fI\&SIDE\fR, \fB\&const int\fR \fI\&N\fR, \fB\&double *\fR \fI\&U\fR, \fB\&const int\fR \fI\&LDU\fR, \fB\&const int\fR \fI\&SRCDIST\fR, \fB\&const int *\fR \fI\&IPLEN\fR, \fB\&const int *\fR \fI\&IPMAP\fR, \fB\&const int *\fR \fI\&IPMAPM1\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_spreadT\fR spreads the local array containing local pieces of U, so that on exit to this function, a piece of U is contained in every process row. The array IPLEN contains the number of columns of U, that should be spread on any given process row. This function also probes for the presence of the column panel PBCST. If available, this panel will be forwarded. If PBCST is NULL on input, this probing mechanism will be disabled. .SH ARGUMENTS .TP 8 PBCST (local input/output) HPL_T_panel * On entry, PBCST points to the data structure containing the panel (to be broadcast) information. .TP 8 IFLAG (local input/output) int * On entry, IFLAG indicates whether or not the broadcast has already been completed. If not, probing will occur, and the outcome will be contained in IFLAG on exit. .TP 8 PANEL (local input/output) HPL_T_panel * On entry, PANEL points to the data structure containing the panel (to be spread) information. .TP 8 SIDE (global input) const enum HPL_SIDE On entry, SIDE specifies whether the local piece of U located in process IPMAP[SRCDIST] should be spread to the right or to the left. This feature is used by the equilibration process. .TP 8 N (global input) const int On entry, N specifies the local number of rows of U. N must be at least zero. .TP 8 U (local input/output) double * On entry, U is an array of dimension (LDU,*) containing the local pieces of U. .TP 8 LDU (local input) const int On entry, LDU specifies the local leading dimension of U. LDU should be at least MAX(1,N). .TP 8 SRCDIST (local input) const int On entry, SRCDIST specifies the source process that spreads its piece of U. .TP 8 IPLEN (global input) const int * On entry, IPLEN is an array of dimension NPROW+1. This array is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U in each process before process IPMAP[i], with the convention that IPLEN[nprow] is the total number of rows. In other words IPLEN[i+1] - IPLEN[i] is the local number of rows of U that should be moved to process IPMAP[i]. .TP 8 IPMAP (global input) const int * On entry, IPMAP is an array of dimension NPROW. This array contains the logarithmic mapping of the processes. In other words, IPMAP[myrow] is the absolute coordinate of the sorted process. .TP 8 IPMAPM1 (global input) const int * On entry, IPMAPM1 is an array of dimension NPROW. This array contains the inverse of the logarithmic mapping contained in IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. .SH SEE ALSO .BR HPL_pdlaswp01T \ (3). hpcc-1.4.1/hpl/man/man3/HPL_sum.30000644000000000000000000000215111256503657013121 00000000000000.TH HPL_sum 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_sum \- Combine (sum) two buffers. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_sum(\fR \fB\&const int\fR \fI\&N\fR, \fB\&const void *\fR \fI\&IN\fR, \fB\&void *\fR \fI\&INOUT\fR, \fB\&const HPL_T_TYPE\fR \fI\&DTYPE\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_sum\fR combines (sum) two buffers. .SH ARGUMENTS .TP 8 N (input) const int On entry, N specifies the length of the buffers to be combined. N must be at least zero. .TP 8 IN (input) const void * On entry, IN points to the input-only buffer to be combined. .TP 8 INOUT (input/output) void * On entry, INOUT points to the input-output buffer to be combined. On exit, the entries of this array contains the combined results. .TP 8 DTYPE (input) const HPL_T_TYPE On entry, DTYPE specifies the type of the buffers operands. .SH SEE ALSO .BR HPL_broadcast \ (3), .BR HPL_reduce \ (3), .BR HPL_all_reduce \ (3), .BR HPL_barrier \ (3), .BR HPL_min \ (3), .BR HPL_max \ (3), .BR HPL_sum \ (3). hpcc-1.4.1/hpl/man/man3/HPL_timer.30000644000000000000000000000232311256503657013436 00000000000000.TH HPL_timer 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_timer \- Timer facility. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_timer(\fR \fB\&const int\fR \fI\&I\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_timer\fR provides a "stopwatch" functionality cpu/wall timer in seconds. Up to 64 separate timers can be functioning at once. The first call starts the timer, and the second stops it. This routine can be disenabled by calling HPL_timer_disable(), so that calls to the timer are ignored. This feature can be used to make sure certain sections of code do not affect timings, even if they call routines which have HPL_timer calls in them. HPL_timer_enable() will re-enable the timer functionality. One can retrieve the current value of a timer by calling t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) where I is the timer index in [0..64). To initialize the timer functionality, one must have called HPL_timer_boot() prior to any of the functions mentioned above. .SH ARGUMENTS .TP 8 I (global input) const int On entry, I specifies the timer to stop/start. .SH SEE ALSO .BR HPL_timer_cputime \ (3), .BR HPL_timer_walltime \ (3). hpcc-1.4.1/hpl/man/man3/HPL_timer_cputime.30000644000000000000000000000170211256503657015164 00000000000000.TH HPL_timer_cputime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_timer_cputime \- Return the CPU time. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_timer_cputime();\fR .SH DESCRIPTION \fB\&HPL_timer_cputime\fR returns the cpu time. If HPL_USE_CLOCK is defined, the clock() function is used to return an approximation of processor time used by the program. The value returned is the CPU time used so far as a clock_t; to get the number of seconds used, the result is divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C standard library. If HPL_USE_TIMES is defined, the times() function is used instead. This function returns the current process times. times() returns the number of clock ticks that have elapsed since the system has been up. Otherwise and by default, the standard library function getrusage() is used. .SH SEE ALSO .BR HPL_timer_walltime \ (3), .BR HPL_timer \ (3). hpcc-1.4.1/hpl/man/man3/HPL_timer_walltime.30000644000000000000000000000057311256503657015341 00000000000000.TH HPL_timer_walltime 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_timer_walltime \- Return the elapsed (wall-clock) time. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&double\fR \fB\&HPL_timer_walltime();\fR .SH DESCRIPTION \fB\&HPL_timer_walltime\fR returns the elapsed (wall-clock) time. .SH SEE ALSO .BR HPL_timer_cputime \ (3), .BR HPL_timer \ (3). hpcc-1.4.1/hpl/man/man3/HPL_warn.30000644000000000000000000000250011256503657013262 00000000000000.TH HPL_warn 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_warn \- displays an error message. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_warn(\fR \fB\&FILE *\fR \fI\&STREAM\fR, \fB\&int\fR \fI\&LINE\fR, \fB\&const char *\fR \fI\&SRNAME\fR, \fB\&const char *\fR \fI\&FORM\fR, \fB\&...\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_warn\fR displays an error message. .SH ARGUMENTS .TP 8 STREAM (local input) FILE * On entry, STREAM specifies the output stream. .TP 8 LINE (local input) int On entry, LINE specifies the line number in the file where the error has occured. When LINE is not a positive line number, it is ignored. .TP 8 SRNAME (local input) const char * On entry, SRNAME should be the name of the routine calling this error handler. .TP 8 FORM (local input) const char * On entry, FORM specifies the format, i.e., how the subsequent arguments are converted for output. .TP 8 (local input) ... On entry, ... is the list of arguments to be printed within the format string. .SH EXAMPLE \fI\&#include "hpl.h"\fR int main(int argc, char *argv[]) .br { .br HPL_warn( stderr, __LINE__, __FILE__, .br "Demo.\en" ); .br exit(0); return(0); .br } .SH SEE ALSO .BR HPL_abort \ (3), .BR HPL_fprintf \ (3). hpcc-1.4.1/hpl/man/man3/HPL_xjumpm.30000644000000000000000000000514611256503657013644 00000000000000.TH HPL_xjumpm 3 "September 10, 2008" "HPL 2.0" "HPL Library Functions" .SH NAME HPL_xjumpm \- Compute constants to jump in the random sequence. .SH SYNOPSIS \fB\&#include "hpl.h"\fR \fB\&void\fR \fB\&HPL_xjumpm(\fR \fB\&const int\fR \fI\&JUMPM\fR, \fB\&int *\fR \fI\&MULT\fR, \fB\&int *\fR \fI\&IADD\fR, \fB\&int *\fR \fI\&IRANN\fR, \fB\&int *\fR \fI\&IRANM\fR, \fB\&int *\fR \fI\&IAM\fR, \fB\&int *\fR \fI\&ICM\fR \fB\&);\fR .SH DESCRIPTION \fB\&HPL_xjumpm\fR computes the constants A and C to jump JUMPM numbers in the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in MULT and IADD specify how to jump from one entry in the sequence to the next. .SH ARGUMENTS .TP 8 JUMPM (local input) const int On entry, JUMPM specifies the number of entries in the sequence to jump over. When JUMPM is less or equal than zero, A and C are not computed, IRANM is set to IRANN corresponding to a jump of size zero. .TP 8 MULT (local input) int * On entry, MULT is an array of dimension 2, that contains the 16-lower and 15-higher bits of the constant a to jump from X(n) to X(n+1) = a*X(n) + c in the random sequence. .TP 8 IADD (local input) int * On entry, IADD is an array of dimension 2, that contains the 16-lower and 15-higher bits of the constant c to jump from X(n) to X(n+1) = a*X(n) + c in the random sequence. .TP 8 IRANN (local input) int * On entry, IRANN is an array of dimension 2. that contains the 16-lower and 15-higher bits of the encoding of X(n). .TP 8 IRANM (local output) int * On entry, IRANM is an array of dimension 2. On exit, this array contains respectively the 16-lower and 15-higher bits of the encoding of X(n+JUMPM). .TP 8 IAM (local output) int * On entry, IAM is an array of dimension 2. On exit, when JUMPM is greater than zero, this array contains the encoded constant A to jump from X(n) to X(n+JUMPM) in the random sequence. IAM(0:1) contains respectively the 16-lower and 15-higher bits of this constant A. When JUMPM is less or equal than zero, this array is not referenced. .TP 8 ICM (local output) int * On entry, ICM is an array of dimension 2. On exit, when JUMPM is greater than zero, this array contains the encoded constant C to jump from X(n) to X(n+JUMPM) in the random sequence. ICM(0:1) contains respectively the 16-lower and 15-higher bits of this constant C. When JUMPM is less or equal than zero, this array is not referenced. .SH SEE ALSO .BR HPL_ladd \ (3), .BR HPL_lmul \ (3), .BR HPL_setran \ (3), .BR HPL_jumpit \ (3), .BR HPL_rand \ (3). hpcc-1.4.1/hpl/setup/Make.CrayX10000644000000000000000000002070511256503657013170 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - Septmeber 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2004 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - MPI directories - library ------------------------------------------ # ---------------------------------------------------------------------- # MPIinc tells the C compiler where to find the MPI header files, MPIlib # is defined to be the name of the MPI library to be used. The variables # MPIdir and MPIplat are only used for defining MPIinc and MPIlib). # MPIdir = MPIplat = # MPIinc = MPIlib = # # ---------------------------------------------------------------------- # - BLAS library ------------------------------------------------------- # ---------------------------------------------------------------------- # BLASlib = # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DStringSunStyle -DF77_INTEGER=int # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc) HPL_LIBS = $(HPLlib) $(BLASlib) # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS F77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) -hlist=m -DLONG_IS_64BITS CCFLAGS = $(HPL_DEFS) -O2 -hlist=m -DLONG_IS_64BITS -hrestrict=a # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = cc LINKFLAGS = $(CCFLAGS) -Wl,-LD_LAYOUT:segalign=0x10000000 # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.FreeBSD_PIV_CBLAS0000644000000000000000000002153011256503657014712 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpich MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a $(MPdir)/lib/libpmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/share/ATLAS/lib/FreeBSD_P5SSE2 LAinc = LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_CBLAS # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = /usr/bin/f77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = /usr/bin/ranlib # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.HPUX_FBLAS0000644000000000000000000002101411256503657013546 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - MPI directories - library ------------------------------------------ # ---------------------------------------------------------------------- # MPIinc tells the C compiler where to find the MPI header files, MPIlib # is defined to be the name of the MPI library to be used. The variables # MPIdir and MPIplat are only used for defining MPIinc and MPIlib). # MPIdir = $(HOME)/local/mpi MPIplat = $(MPIdir)/hpux/ch_p4 # MPIinc = -I$(MPIdir)/include -I$(MPIplat)/include MPIlib = $(MPIplat)/lib/libmpich.a # # ---------------------------------------------------------------------- # - BLAS library ------------------------------------------------------- # ---------------------------------------------------------------------- # BLASlib = /usr/lib/pa1.1/libblas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(MPIinc) HPL_LIBS = $(HPLlib) $(BLASlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS F77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(HPL_INCLUDES) $(F2CDEFS) $(HPL_OPTS) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -D_INCLUDE_POSIX_SOURCE -DUseTimes -Aa +O4 # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = cc LINKFLAGS = -Aa # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.I860_FBLAS0000644000000000000000000002104411256503657013413 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = -lmpi # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lkmath # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) -nx CCFLAGS = $(HPL_DEFS) -O4 -nx # LINKER = f77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.IRIX_FBLAS0000644000000000000000000002134111256503657013540 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = $(HOME)/local/mpi MPinc = -I$(MPdir)/include -I$(MPdir)/IRIX64/ch_p4/include MPlib = $(MPdir)/IRIX64/ch_p4/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lblas # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DStringSunStyle -DF77_INTEGER=int # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) -64 CCFLAGS = $(HPL_DEFS) -O3 -64 -OPT:Olimit=15000 -TARG:platform=IP30 \ -LNO:blocking=OFF -LOPT:alias=typed # LINKER = cc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.LinuxIntelIA64Itan2_eccMKL0000644000000000000000000002153211256503657016613 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2004 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lmkl_i2p -lpthread -lguide # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # `mpicc' should be a wrapper around Intel `ecc' compiler # (type of CPU could be -mcpu=itanium or -mcpu=itanium2) CC = mpicc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O3 -fno-alias -Wall -mcpu=itanium2 # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpicc LINKFLAGS = -L/usr/local/mkl-6.0.12/mkl60/lib/64 # ARCHIVER = ar ARFLAGS = rc RANLIB = ranlib # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_ATHLON_CBLAS0000644000000000000000000002127411256503657015133 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - MPI directories - library ------------------------------------------ # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON LAinc = LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the Fortran 77 BLAS interface # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_CBLAS # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # LINKER = /usr/bin/gcc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_ATHLON_FBLAS0000644000000000000000000002133311256503657015132 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_ATHLON LAinc = LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # LINKER = /usr/bin/g77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_ATHLON_VSIPL0000644000000000000000000002127711256503657015207 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - MPI directories - library ------------------------------------------ # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = /home/software/TASP_VSIPL_Core_Plus LAinc = -I$(LAdir)/include LAlib = $(LAdir)/lib/libvsip_c.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the Fortran 77 BLAS interface # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_VSIPL # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # LINKER = /usr/bin/gcc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_AtlasCBLAS_Lam0000644000000000000000000002137511256503657015646 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr MPinc = -I$(MPdir)/include MPlib = -lmpi -llam # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = -lcblas -latlas # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_CBLAS # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -pipe # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = gcc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = rc RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_AtlasFBLAS_Lam0000644000000000000000000002143011256503657015641 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr MPinc = -I$(MPdir)/include MPlib = -lmpi -llam # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = -lf77blas -latlas # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall -pipe # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = g77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = rc RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_CBLAS0000644000000000000000000002145511256503657014570 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_CBLAS # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = /usr/bin/g77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_CBLAS_gm0000644000000000000000000002135711256503657015254 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = $(LAdir)/libcblas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_CBLAS # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpicc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpif77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_FBLAS0000644000000000000000000002152111256503657014565 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = /usr/bin/g77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_FBLAS_gm0000644000000000000000000002141211256503657015247 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/netlib/ARCHIVES/Linux_PII LAinc = LAlib = $(LAdir)/libf77blas.a $(LAdir)/libatlas.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpicc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpif77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_VSIPL0000644000000000000000000002147411256503657014642 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = /home/software/TASP_VSIPL_Core_Plus LAinc = -I$(LAdir)/include LAlib = $(LAdir)/lib/libvsip_c.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_VSIPL # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = /usr/bin/gcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = /usr/bin/g77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_PII_VSIPL_gm0000644000000000000000000002136511256503657015324 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = /home/software/TASP_VSIPL_Core_Plus LAinc = -I$(LAdir)/include LAlib = $(LAdir)/lib/libvsip_c.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_VSIPL # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpicc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -fomit-frame-pointer -O3 -funroll-loops -W -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpif77 LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_SGI_AltixIA64_Goto0000644000000000000000000002132411256503657016375 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = -lmpi # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/scratch/lib LAinc = LAlib = $(LAdir)/libgoto_it2.so # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O2 -Wall -DXERBLA_MISSING # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = cc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = rc RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Linux_SGI_AltixIA64_SCSL0000644000000000000000000002123511256503657016232 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = -lmpi # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lscs # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O2 -Wall # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = cc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = rc RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.PWR2_FBLAS0000644000000000000000000002113411256503657013517 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lesslp2 # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc_r CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O3 -qarch=pwr2 -qtune=pwr2 -qmaxmem=-1 # LINKER = mpxlf_r LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.PWR3_FBLAS0000644000000000000000000002114411256503657013521 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lessl # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -qtune=pwr3 -qarch=pwr3 -O3 -qmaxmem=-1 -qfloat=hsflt # LINKER = mpxlf LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.PWRPC_FBLAS0000644000000000000000000002122211256503657013656 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include MPlib = $(MPdir)/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lessl # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc_r CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O3 -qarch=ppc -qtune=604 -qmaxmem=-1 # LINKER = mpxlf_r LINKFLAGS = -bmaxdata:0x70000000 $(CCFLAGS) # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Power4_ESSL0000644000000000000000000002132611256503657014067 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lessl # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -qtune=pwr4 -qarch=pwr4 -O3 -qmaxmem=-1 -qfloat=hsflt # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpxlf LINKFLAGS = -bmaxdata:0x70000000 # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Power4_ESSLSMP0000644000000000000000000002134311256503657014446 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lesslsmp # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc_r -q64 -qarch=power4 CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -qtune=pwr4 -O3 -qhot -qsmp=omp # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpcc_r -q64 -qarch=power4 LINKFLAGS = -qsmp # ARCHIVER = ar ARFLAGS = -r -X64 RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Power4_ESSL_r0000644000000000000000000002132011256503657014402 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lessl # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DNoChange -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc_r -q64 -qarch=power4 CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -qtune=pwr4 -O3 -qhot # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpcc_r -q64 -qarch=power4 LINKFLAGS = # ARCHIVER = ar ARFLAGS = -r -X64 RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.SUN4SOL2-g_FBLAS0000644000000000000000000002124311256503657014403 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = $(HOME)/local/mpi MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -xlic_lib=sunperf # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -g # LINKER = purify -best-effort f77 LINKFLAGS = # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.SUN4SOL2-g_VSIPL0000644000000000000000000002130611256503657014451 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = $(HOME)/local/mpi MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = $(HOME)/local/TASP_VSIPL_Core_Plus LAinc = -I$(LAdir)/include LAlib = $(LAdir)/lib/libvsip_c.a # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = -DHPL_CALL_VSIPL # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -g # LINKER = purify -best-effort cc LINKFLAGS = # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.SUN4SOL2_FBLAS0000644000000000000000000002133611256503657014162 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = $(HOME)/local/mpi MPinc = -I$(MPdir)/include -I$(MPdir)/solaris/ch_p4/include MPlib = $(MPdir)/solaris/ch_p4/lib/libmpich.a -lsocket -lnsl # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -xlic_lib=sunperf # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -dalign -fsingle -xO5 -native -xarch=v8plusa # LINKER = f77 LINKFLAGS = -dalign -native -xarch=v8plusa -xO5 # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Sun0000644000000000000000000002125411256503657012626 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2004 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = -lmpi # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -xlic_lib=sunperf # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpcc CCNOOPT = -xarch=v9b CCFLAGS = -fast -xarch=v9b # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpcc LINKFLAGS = -fast -xarch=v9b # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.T3E_FBLAS0000644000000000000000000002156711256503657013372 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DUpCase -DF77_INTEGER=long -DStringCrayStyle \ -DCRAY_BLAS -DHPL_USE_TIMES # # When UpCase is defined, CRAY_BLAS redefines the BLAS routines used in # HPL to be prefixed with an S. In the Cray programming environment, the # default INTEGER and REAL size is 64 bits. This is reflected in the # Cray Scientific Library as well, so SGEMM is the 64-bit matrix multi- # ply. # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O3 # LINKER = f77 LINKFLAGS = -O3,unroll2,pipeline2 # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Tru64_FBLAS0000644000000000000000000002126211256503657013713 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local/mpi MPinc = -I$(MPdir)/include -I$(MPdir)/alpha/ch_p4/include MPlib = $(MPdir)/alpha/ch_p4/lib/libmpich.a # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lcxml # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 # LINKER = f77 LINKFLAGS = -nofor_main -O5 -arch host -tune host # ARCHIVER = ar ARFLAGS = r RANLIB = ranlib # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Tru64_FBLAS_MPI0000644000000000000000000002121411256503657014415 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lcxml # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpicc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -O2 # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpicc LINKFLAGS = # ARCHIVER = ar ARFLAGS = r RANLIB = echo # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.Tru64_FBLAS_elan0000644000000000000000000002113211256503657014706 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = MPinc = MPlib = -lmpi -lelan # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lcxml # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = cc CCNOOPT = $(HPL_DEFS) CCFLAGS = $(HPL_DEFS) -arch host -tune host -std -O5 # LINKER = f77 LINKFLAGS = -nofor_main -O5 -arch host -tune host # ARCHIVER = ar ARFLAGS = r RANLIB = ranlib # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.UNKNOWN.in0000644000000000000000000002112111256503657013616 00000000000000# # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2004 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = @SHELL@ # CD = @CD@ CP = @CP@ LN_S = @LN_S@ MKDIR = @MKDIR@ RM = @RM@ TOUCH = @TOUCH@ # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = @MPDIR@ MPinc = @MPINC@ MPlib = @MPLIB@ # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = @LADIR@ LAinc = @LAINC@ LAlib = @LALIB@ # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = @F2CDEFS@ # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = @CC@ CCNOOPT = $(HPL_DEFS) @CCNOOPT@ CCFLAGS = $(HPL_DEFS) @CCFLAGS@ # LINKER = @LINKER@ LINKFLAGS = @LINKFLAGS@ # ARCHIVER = @ARCHIVER@ ARFLAGS = @ARFLAGS@ RANLIB = @RANLIB@ # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/Make.cygwin0000644000000000000000000002140211256503657013354 00000000000000# -*- Makefile -*- # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # ---------------------------------------------------------------------- # - shell -------------------------------------------------------------- # ---------------------------------------------------------------------- # SHELL = /bin/sh # CD = cd CP = cp LN_S = ln -s MKDIR = mkdir RM = /bin/rm -f TOUCH = touch # # ---------------------------------------------------------------------- # - Platform identifier ------------------------------------------------ # ---------------------------------------------------------------------- # ARCH = $(arch) # # ---------------------------------------------------------------------- # - HPL Directory Structure / HPL library ------------------------------ # ---------------------------------------------------------------------- # TOPdir = ../../.. INCdir = $(TOPdir)/include BINdir = $(TOPdir)/bin/$(ARCH) LIBdir = $(TOPdir)/lib/$(ARCH) # HPLlib = $(LIBdir)/libhpl.a # # ---------------------------------------------------------------------- # - Message Passing library (MPI) -------------------------------------- # ---------------------------------------------------------------------- # MPinc tells the C compiler where to find the Message Passing library # header files, MPlib is defined to be the name of the library to be # used. The variable MPdir is only used for defining MPinc and MPlib. # MPdir = /usr/local MPinc = MPlib = # # ---------------------------------------------------------------------- # - Linear Algebra library (BLAS or VSIPL) ----------------------------- # ---------------------------------------------------------------------- # LAinc tells the C compiler where to find the Linear Algebra library # header files, LAlib is defined to be the name of the library to be # used. The variable LAdir is only used for defining LAinc and LAlib. # LAdir = LAinc = LAlib = -lblas # # ---------------------------------------------------------------------- # - F77 / C interface -------------------------------------------------- # ---------------------------------------------------------------------- # You can skip this section if and only if you are not planning to use # a BLAS library featuring a Fortran 77 interface. Otherwise, it is # necessary to fill out the F2CDEFS variable with the appropriate # options. **One and only one** option should be chosen in **each** of # the 3 following categories: # # 1) name space (How C calls a Fortran 77 routine) # # -DAdd_ : all lower case and a suffixed underscore (Suns, # Intel, ...), [default] # -DNoChange : all lower case (IBM RS6000), # -DUpCase : all upper case (Cray), # -DAdd__ : the FORTRAN compiler in use is f2c. # # 2) C and Fortran 77 integer mapping # # -DF77_INTEGER=int : Fortran 77 INTEGER is a C int, [default] # -DF77_INTEGER=long : Fortran 77 INTEGER is a C long, # -DF77_INTEGER=short : Fortran 77 INTEGER is a C short. # # 3) Fortran 77 string handling # # -DStringSunStyle : The string address is passed at the string loca- # tion on the stack, and the string length is then # passed as an F77_INTEGER after all explicit # stack arguments, [default] # -DStringStructPtr : The address of a structure is passed by a # Fortran 77 string, and the structure is of the # form: struct {char *cp; F77_INTEGER len;}, # -DStringStructVal : A structure is passed by value for each Fortran # 77 string, and the structure is of the form: # struct {char *cp; F77_INTEGER len;}, # -DStringCrayStyle : Special option for Cray machines, which uses # Cray fcd (fortran character descriptor) for # interoperation. # F2CDEFS = -DAdd_ -DF77_INTEGER=int -DStringSunStyle # # ---------------------------------------------------------------------- # - HPL includes / libraries / specifics ------------------------------- # ---------------------------------------------------------------------- # HPL_INCLUDES = -I$(INCdir) -I$(INCdir)/$(ARCH) $(LAinc) $(MPinc) HPL_LIBS = $(HPLlib) $(LAlib) $(MPlib) -lm # # - Compile time options ----------------------------------------------- # # -DHPL_COPY_L force the copy of the panel L before bcast; # -DHPL_CALL_CBLAS call the cblas interface; # -DHPL_CALL_VSIPL call the vsip library; # -DHPL_DETAILED_TIMING enable detailed timers; # # By default HPL will: # *) not copy L before broadcast, # *) call the BLAS Fortran 77 interface, # *) not display detailed timing information. # HPL_OPTS = # # ---------------------------------------------------------------------- # HPL_DEFS = $(F2CDEFS) $(HPL_OPTS) $(HPL_INCLUDES) # # ---------------------------------------------------------------------- # - Compilers / linkers - Optimization flags --------------------------- # ---------------------------------------------------------------------- # CC = mpicc CCNOOPT = $(HPL_DEFS) -W -Wall -pipe -pedantic CCFLAGS = $(CCNOOPT) -march=pentium3 -fomit-frame-pointer -O3 -funroll-loops # # On some platforms, it is necessary to use the Fortran linker to find # the Fortran internals used in the BLAS library. # LINKER = mpicc LINKFLAGS = $(CCFLAGS) # ARCHIVER = ar ARFLAGS = rc RANLIB = ranlib # # ---------------------------------------------------------------------- hpcc-1.4.1/hpl/setup/make_generic0000644000000000000000000001042211256503657013611 00000000000000#!/bin/sh # # # -- High Performance Computing Linpack Benchmark (HPL) # HPL - 2.0 - September 10, 2008 # Antoine P. Petitet # University of Tennessee, Knoxville # Innovative Computing Laboratory # (C) Copyright 2000-2008 All Rights Reserved # # -- Copyright notice and Licensing terms: # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions, and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # 3. All advertising materials mentioning features or use of this # software must display the following acknowledgement: # This product includes software developed at the University of # Tennessee, Knoxville, Innovative Computing Laboratory. # # 4. The name of the University, the name of the Laboratory, or the # names of its contributors may not be used to endorse or promote # products derived from this software without specific written # permission. # # -- Disclaimer: # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # ###################################################################### # # # Configure script to create Make.UNKNOWN from Make.UNKNOWN.in for the # HPL distribution, so users without a real Unix system can have a gene- # ric Make.UNKNOWN to edit for their needs. This script substitutes # pathless version of all the system programs, and commonly used options # values into Make.UNKNOWN.in. # ######################################################################## # sed -e 's%@SHELL@%/bin/sh%' \ -e 's%@CD@%cd%' \ -e 's%@CP@%cp%' \ -e 's%@LN_S@%ln -s%' \ -e 's%@MKDIR@%mkdir%' \ -e 's%@RM@%/bin/rm -f%' \ -e 's%@TOUCH@%touch%' \ -e 's%@ARCH@%UNKNOWN%' \ -e 's%@CC@%mpicc%' \ -e 's%@CCNOOPT@%%' \ -e 's%@CCFLAGS@%%' \ -e 's%@LINKER@%mpif77%' \ -e 's%@LINKFLAGS@%%' \ -e 's%@ARCHIVER@%ar%' \ -e 's%@ARFLAGS@%r%' \ -e 's%@RANLIB@%echo%' \ -e 's%@MPDIR@%%' \ -e 's%@MPINC@%%' \ -e 's%@MPLIB@%%' \ -e 's%@F2CDEFS@%-DAdd_ -DF77_INTEGER=int -DStringSunStyle%' \ -e 's%@LADIR@%%' \ -e 's%@LAINC@%%' \ -e 's%@LALIB@%-lblas%' \ Make.UNKNOWN.in > Make.UNKNOWN # ######################################################################## hpcc-1.4.1/hpl/src/auxil/HPL_abort.c0000644000000000000000000001235411256503657014012 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_abort ( int LINE, const char * SRNAME, const char * FORM, ... ) #else void HPL_abort( va_alist ) va_dcl #endif { /* * Purpose * ======= * * HPL_abort displays an error message on stderr and halts execution. * * * Arguments * ========= * * LINE (local input) int * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a positive line * number, it is ignored. * * SRNAME (local input) const char * * On entry, SRNAME should be the name of the routine calling * this error handler. * * FORM (local input) const char * * On entry, FORM specifies the format, i.e., how the subsequent * arguments are converted for output. * * (local input) ... * On entry, ... is the list of arguments to be printed within * the format string. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ va_list argptr; char cline[128]; #ifndef HPL_STDC_HEADERS int LINE; char * FORM, * SRNAME; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_STDC_HEADERS va_start( argptr, FORM ); #else va_start( argptr ); LINE = va_arg( argptr, int ); SRNAME = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif (void) vsprintf( cline, FORM, argptr ); va_end( argptr ); /* * Display an error message */ if( LINE <= 0 ) HPL_fprintf( stderr, "%s %s:\n>>> %s <<< Abort ...\n\n", "HPL ERROR in function", SRNAME, cline ); else HPL_fprintf( stderr, "%s %d %s %s:\n>>> %s <<< Abort ...\n\n", "HPL ERROR on line", LINE, "of function", SRNAME, cline ); exit( 0 ); /* * End of HPL_abort */ } hpcc-1.4.1/hpl/src/auxil/HPL_dlacpy.c0000644000000000000000000003463711256503657014167 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factors * #ifndef HPL_LACPY_M_DEPTH * #define HPL_LACPY_M_DEPTH 32 * #define HPL_LACPY_LOG2_M_DEPTH 5 * #endif * #ifndef HPL_LACPY_N_DEPTH * #define HPL_LACPY_N_DEPTH 4 * #define HPL_LACPY_LOG2_N_DEPTH 2 * #endif */ #ifndef HPL_LACPY_M_DEPTH #define HPL_LACPY_M_DEPTH 4 #define HPL_LACPY_LOG2_M_DEPTH 2 #endif #ifndef HPL_LACPY_N_DEPTH #define HPL_LACPY_N_DEPTH 2 #define HPL_LACPY_LOG2_N_DEPTH 1 #endif #ifdef HPL_STDC_HEADERS void HPL_dlacpy ( const int M, const int N, const double * A, const int LDA, double * B, const int LDB ) #else void HPL_dlacpy ( M, N, A, LDA, B, LDB ) const int M; const int N; const double * A; const int LDA; double * B; const int LDB; #endif { /* * Purpose * ======= * * HPL_dlacpy copies an array A into an array B. * * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of the arrays A and * B. M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the arrays A * and B. N must be at least zero. * * A (local input) const double * * On entry, A points to an array of dimension (LDA,N). * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * B (local output) double * * On entry, B points to an array of dimension (LDB,N). On exit, * B is overwritten with A. * * LDB (local input) const int * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least MAX(1,M). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef HPL_LACPY_USE_COPY register int j; #else #if ( HPL_LACPY_N_DEPTH == 1 ) const double * A0 = A; double * B0 = B; #elif ( HPL_LACPY_N_DEPTH == 2 ) const double * A0 = A, * A1 = A + LDA; double * B0 = B, * B1 = B + LDB; #elif ( HPL_LACPY_N_DEPTH == 4 ) const double * A0 = A, * A1 = A + LDA, * A2 = A + (LDA << 1), * A3 = A + 3 * LDA; double * B0 = B, * B1 = B + LDB, * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; #endif const int incA = ( (unsigned int)(LDA) << HPL_LACPY_LOG2_N_DEPTH ) - M, incB = ( (unsigned int)(LDB) << HPL_LACPY_LOG2_N_DEPTH ) - M, incA0 = (unsigned int)(LDA) - M, incB0 = (unsigned int)(LDB) - M; int mu, nu; register int i, j; #endif /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; #ifdef HPL_LACPY_USE_COPY for( j = 0; j < N; j++, A0 += LDA, B0 += LDB ) HPL_dcopy( M, A0, 1, B0, 1 ); #else mu = (int)( ( (unsigned int)(M) >> HPL_LACPY_LOG2_M_DEPTH ) << HPL_LACPY_LOG2_M_DEPTH ); nu = (int)( ( (unsigned int)(N) >> HPL_LACPY_LOG2_N_DEPTH ) << HPL_LACPY_LOG2_N_DEPTH ); for( j = 0; j < nu; j += HPL_LACPY_N_DEPTH ) { for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH ) { #if ( HPL_LACPY_N_DEPTH == 1 ) B0[ 0] = A0[ 0]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[ 0] = A0[ 0]; B1[ 0] = A1[ 0]; B2[ 0] = A2[ 0]; B3[ 0] = A3[ 0]; #endif #if ( HPL_LACPY_M_DEPTH > 1 ) #if ( HPL_LACPY_N_DEPTH == 1 ) B0[ 1] = A0[ 1]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[ 1] = A0[ 1]; B1[ 1] = A1[ 1]; B2[ 1] = A2[ 1]; B3[ 1] = A3[ 1]; #endif #endif #if ( HPL_LACPY_M_DEPTH > 2 ) #if ( HPL_LACPY_N_DEPTH == 1 ) B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[ 2] = A0[ 2]; B1[ 2] = A1[ 2]; B2[ 2] = A2[ 2]; B3[ 2] = A3[ 2]; B0[ 3] = A0[ 3]; B1[ 3] = A1[ 3]; B2[ 3] = A2[ 3]; B3[ 3] = A3[ 3]; #endif #endif #if ( HPL_LACPY_M_DEPTH > 4 ) #if ( HPL_LACPY_N_DEPTH == 1 ) B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[ 4] = A0[ 4]; B1[ 4] = A1[ 4]; B2[ 4] = A2[ 4]; B3[ 4] = A3[ 4]; B0[ 5] = A0[ 5]; B1[ 5] = A1[ 5]; B2[ 5] = A2[ 5]; B3[ 5] = A3[ 5]; B0[ 6] = A0[ 6]; B1[ 6] = A1[ 6]; B2[ 6] = A2[ 6]; B3[ 6] = A3[ 6]; B0[ 7] = A0[ 7]; B1[ 7] = A1[ 7]; B2[ 7] = A2[ 7]; B3[ 7] = A3[ 7]; #endif #endif #if ( HPL_LACPY_M_DEPTH > 8 ) #if ( HPL_LACPY_N_DEPTH == 1 ) B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B0[10] = A0[10]; B1[10] = A1[10]; B0[11] = A0[11]; B1[11] = A1[11]; B0[12] = A0[12]; B1[12] = A1[12]; B0[13] = A0[13]; B1[13] = A1[13]; B0[14] = A0[14]; B1[14] = A1[14]; B0[15] = A0[15]; B1[15] = A1[15]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[ 8] = A0[ 8]; B1[ 8] = A1[ 8]; B2[ 8] = A2[ 8]; B3[ 8] = A3[ 8]; B0[ 9] = A0[ 9]; B1[ 9] = A1[ 9]; B2[ 9] = A2[ 9]; B3[ 9] = A3[ 9]; B0[10] = A0[10]; B1[10] = A1[10]; B2[10] = A2[10]; B3[10] = A3[10]; B0[11] = A0[11]; B1[11] = A1[11]; B2[11] = A2[11]; B3[11] = A3[11]; B0[12] = A0[12]; B1[12] = A1[12]; B2[12] = A2[12]; B3[12] = A3[12]; B0[13] = A0[13]; B1[13] = A1[13]; B2[13] = A2[13]; B3[13] = A3[13]; B0[14] = A0[14]; B1[14] = A1[14]; B2[14] = A2[14]; B3[14] = A3[14]; B0[15] = A0[15]; B1[15] = A1[15]; B2[15] = A2[15]; B3[15] = A3[15]; #endif #endif #if ( HPL_LACPY_M_DEPTH > 16 ) #if ( HPL_LACPY_N_DEPTH == 1 ) B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; #elif ( HPL_LACPY_N_DEPTH == 2 ) B0[16] = A0[16]; B1[16] = A1[16]; B0[17] = A0[17]; B1[17] = A1[17]; B0[18] = A0[18]; B1[18] = A1[18]; B0[19] = A0[19]; B1[19] = A1[19]; B0[20] = A0[20]; B1[20] = A1[20]; B0[21] = A0[21]; B1[21] = A1[21]; B0[22] = A0[22]; B1[22] = A1[22]; B0[23] = A0[23]; B1[23] = A1[23]; B0[24] = A0[24]; B1[24] = A1[24]; B0[25] = A0[25]; B1[25] = A1[25]; B0[26] = A0[26]; B1[26] = A1[26]; B0[27] = A0[27]; B1[27] = A1[27]; B0[28] = A0[28]; B1[28] = A1[28]; B0[29] = A0[29]; B1[29] = A1[29]; B0[30] = A0[30]; B1[30] = A1[30]; B0[31] = A0[31]; B1[31] = A1[31]; #elif ( HPL_LACPY_N_DEPTH == 4 ) B0[16] = A0[16]; B1[16] = A1[16]; B2[16] = A2[16]; B3[16] = A3[16]; B0[17] = A0[17]; B1[17] = A1[17]; B2[17] = A2[17]; B3[17] = A3[17]; B0[18] = A0[18]; B1[18] = A1[18]; B2[18] = A2[18]; B3[18] = A3[18]; B0[19] = A0[19]; B1[19] = A1[19]; B2[19] = A2[19]; B3[19] = A3[19]; B0[20] = A0[20]; B1[20] = A1[20]; B2[20] = A2[20]; B3[20] = A3[20]; B0[21] = A0[21]; B1[21] = A1[21]; B2[21] = A2[21]; B3[21] = A3[21]; B0[22] = A0[22]; B1[22] = A1[22]; B2[22] = A2[22]; B3[22] = A3[22]; B0[23] = A0[23]; B1[23] = A1[23]; B2[23] = A2[23]; B3[23] = A3[23]; B0[24] = A0[24]; B1[24] = A1[24]; B2[24] = A2[24]; B3[24] = A3[24]; B0[25] = A0[25]; B1[25] = A1[25]; B2[25] = A2[25]; B3[25] = A3[25]; B0[26] = A0[26]; B1[26] = A1[26]; B2[26] = A2[26]; B3[26] = A3[26]; B0[27] = A0[27]; B1[27] = A1[27]; B2[27] = A2[27]; B3[27] = A3[27]; B0[28] = A0[28]; B1[28] = A1[28]; B2[28] = A2[28]; B3[28] = A3[28]; B0[29] = A0[29]; B1[29] = A1[29]; B2[29] = A2[29]; B3[29] = A3[29]; B0[30] = A0[30]; B1[30] = A1[30]; B2[30] = A2[30]; B3[30] = A3[30]; B0[31] = A0[31]; B1[31] = A1[31]; B2[31] = A2[31]; B3[31] = A3[31]; #endif #endif #if ( HPL_LACPY_N_DEPTH == 1 ) A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; #elif ( HPL_LACPY_N_DEPTH == 2 ) A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; #elif ( HPL_LACPY_N_DEPTH == 4 ) A0 += HPL_LACPY_M_DEPTH; B0 += HPL_LACPY_M_DEPTH; A1 += HPL_LACPY_M_DEPTH; B1 += HPL_LACPY_M_DEPTH; A2 += HPL_LACPY_M_DEPTH; B2 += HPL_LACPY_M_DEPTH; A3 += HPL_LACPY_M_DEPTH; B3 += HPL_LACPY_M_DEPTH; #endif } for( i = mu; i < M; i++ ) { #if ( HPL_LACPY_N_DEPTH == 1 ) *B0 = *A0; B0++; A0++; #elif ( HPL_LACPY_N_DEPTH == 2 ) *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; #elif ( HPL_LACPY_N_DEPTH == 4 ) *B0 = *A0; B0++; A0++; *B1 = *A1; B1++; A1++; *B2 = *A2; B2++; A2++; *B3 = *A3; B3++; A3++; #endif } #if ( HPL_LACPY_N_DEPTH == 1 ) A0 += incA; B0 += incB; #elif ( HPL_LACPY_N_DEPTH == 2 ) A0 += incA; B0 += incB; A1 += incA; B1 += incB; #elif ( HPL_LACPY_N_DEPTH == 4 ) A0 += incA; B0 += incB; A1 += incA; B1 += incB; A2 += incA; B2 += incB; A3 += incA; B3 += incB; #endif } for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) { for( i = 0; i < mu; i += HPL_LACPY_M_DEPTH, B0 += HPL_LACPY_M_DEPTH, A0 += HPL_LACPY_M_DEPTH ) { B0[ 0] = A0[ 0]; #if ( HPL_LACPY_M_DEPTH > 1 ) B0[ 1] = A0[ 1]; #endif #if ( HPL_LACPY_M_DEPTH > 2 ) B0[ 2] = A0[ 2]; B0[ 3] = A0[ 3]; #endif #if ( HPL_LACPY_M_DEPTH > 4 ) B0[ 4] = A0[ 4]; B0[ 5] = A0[ 5]; B0[ 6] = A0[ 6]; B0[ 7] = A0[ 7]; #endif #if ( HPL_LACPY_M_DEPTH > 8 ) B0[ 8] = A0[ 8]; B0[ 9] = A0[ 9]; B0[10] = A0[10]; B0[11] = A0[11]; B0[12] = A0[12]; B0[13] = A0[13]; B0[14] = A0[14]; B0[15] = A0[15]; #endif #if ( HPL_LACPY_M_DEPTH > 16 ) B0[16] = A0[16]; B0[17] = A0[17]; B0[18] = A0[18]; B0[19] = A0[19]; B0[20] = A0[20]; B0[21] = A0[21]; B0[22] = A0[22]; B0[23] = A0[23]; B0[24] = A0[24]; B0[25] = A0[25]; B0[26] = A0[26]; B0[27] = A0[27]; B0[28] = A0[28]; B0[29] = A0[29]; B0[30] = A0[30]; B0[31] = A0[31]; #endif } for( i = mu; i < M; i++, B0++, A0++ ) { *B0 = *A0; } } #endif /* * End of HPL_dlacpy */ } hpcc-1.4.1/hpl/src/auxil/HPL_dlamch.c0000644000000000000000000007115111256503657014133 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * --------------------------------------------------------------------- * Static function prototypes * --------------------------------------------------------------------- */ static void HPL_dlamc1 STDC_ARGS( ( int *, int *, int *, int * ) ); static void HPL_dlamc2 STDC_ARGS( ( int *, int *, int *, double *, int *, double *, int *, double * ) ); static double HPL_dlamc3 STDC_ARGS( ( const double, const double ) ); static void HPL_dlamc4 STDC_ARGS( ( int *, const double, const int ) ); static void HPL_dlamc5 STDC_ARGS( ( const int, const int, const int, const int, int *, double * ) ); static double HPL_dipow STDC_ARGS( ( const double, const int ) ); #ifdef HPL_STDC_HEADERS double HPL_dlamch ( const HPL_T_MACH CMACH ) #else double HPL_dlamch ( CMACH ) const HPL_T_MACH CMACH; #endif { /* * Purpose * ======= * * HPL_dlamch determines machine-specific arithmetic constants such as * the relative machine precision (eps), the safe minimum (sfmin) such * that 1 / sfmin does not overflow, the base of the machine (base), the * precision (prec), the number of (base) digits in the mantissa (t), * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the * minimum exponent before (gradual) underflow (emin), the underflow * threshold (rmin) base**(emin-1), the largest exponent before overflow * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamch.f (version 2.0 -- 1992), that was itself * based on the function ENVRON by Malcolm and incorporated suggestions * by Gentleman and Marovich. See * * Malcolm M. A., Algorithms to reveal properties of floating-point * arithmetic., Comms. of the ACM, 15, 949-951 (1972). * * Gentleman W. M. and Marovich S. B., More on algorithms that reveal * properties of floating point arithmetic units., Comms. of the ACM, * 17, 276-277 (1974). * * Arguments * ========= * * CMACH (local input) const HPL_T_MACH * Specifies the value to be returned by HPL_dlamch * = HPL_MACH_EPS, HPL_dlamch := eps (default) * = HPL_MACH_SFMIN, HPL_dlamch := sfmin * = HPL_MACH_BASE, HPL_dlamch := base * = HPL_MACH_PREC, HPL_dlamch := eps*base * = HPL_MACH_MLEN, HPL_dlamch := t * = HPL_MACH_RND, HPL_dlamch := rnd * = HPL_MACH_EMIN, HPL_dlamch := emin * = HPL_MACH_RMIN, HPL_dlamch := rmin * = HPL_MACH_EMAX, HPL_dlamch := emax * = HPL_MACH_RMAX, HPL_dlamch := rmax * * where * * eps = relative machine precision, * sfmin = safe minimum, * base = base of the machine, * prec = eps*base, * t = number of digits in the mantissa, * rnd = 1.0 if rounding occurs in addition, * emin = minimum exponent before underflow, * rmin = underflow threshold, * emax = largest exponent before overflow, * rmax = overflow threshold. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ static double eps, sfmin, base, t, rnd, emin, rmin, emax, rmax, prec; double small; static int first=1; int beta=0, imax=0, imin=0, it=0, lrnd=0; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; HPL_dlamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); base = (double)(beta); t = (double)(it); if( lrnd != 0 ) { rnd = HPL_rone; eps = HPL_dipow( base, 1 - it ) / HPL_rtwo; } else { rnd = HPL_rzero; eps = HPL_dipow( base, 1 - it ); } prec = eps * base; emin = (double)(imin); emax = (double)(imax); sfmin = rmin; small = HPL_rone / rmax; /* * Use SMALL plus a bit, to avoid the possibility of rounding causing * overflow when computing 1/sfmin. */ if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); } if( CMACH == HPL_MACH_EPS ) return( eps ); if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); if( CMACH == HPL_MACH_BASE ) return( base ); if( CMACH == HPL_MACH_PREC ) return( prec ); if( CMACH == HPL_MACH_MLEN ) return( t ); if( CMACH == HPL_MACH_RND ) return( rnd ); if( CMACH == HPL_MACH_EMIN ) return( emin ); if( CMACH == HPL_MACH_RMIN ) return( rmin ); if( CMACH == HPL_MACH_EMAX ) return( emax ); if( CMACH == HPL_MACH_RMAX ) return( rmax ); return( eps ); /* * End of HPL_dlamch */ } #ifdef HPL_STDC_HEADERS static void HPL_dlamc1 ( int * BETA, int * T, int * RND, int * IEEE1 ) #else static void HPL_dlamc1 ( BETA, T, RND, IEEE1 ) /* * .. Scalar Arguments .. */ int * BETA, * IEEE1, * RND, * T; #endif { /* * Purpose * ======= * * HPL_dlamc1 determines the machine parameters given by BETA, T, RND, * and IEEE1. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamc1.f (version 2.0 -- 1992), that was itself * based on the function ENVRON by Malcolm and incorporated suggestions * by Gentleman and Marovich. See * * Malcolm M. A., Algorithms to reveal properties of floating-point * arithmetic., Comms. of the ACM, 15, 949-951 (1972). * * Gentleman W. M. and Marovich S. B., More on algorithms that reveal * properties of floating point arithmetic units., Comms. of the ACM, * 17, 276-277 (1974). * * Arguments * ========= * * BETA (local output) int * * The base of the machine. * * T (local output) int * * The number of ( BETA ) digits in the mantissa. * * RND (local output) int * * Specifies whether proper rounding (RND=1) or chopping (RND=0) * occurs in addition. This may not be a reliable guide to the * way in which the machine performs its arithmetic. * * IEEE1 (local output) int * * Specifies whether rounding appears to be done in the IEEE * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double a, b, c, f, one, qtr, savec, t1, t2; static int first=1, lbeta, lieee1, lrnd, lt; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; one = HPL_rone; /* * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and * RND. Throughout this routine we use the function HPL_dlamc3 to ensure * that relevant values are stored and not held in registers, or are not * affected by optimizers. * * Compute a = 2.0**m with the smallest positive integer m such that * fl( a + 1.0 ) == a. */ a = HPL_rone; c = HPL_rone; do { a *= HPL_rtwo; c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } while( c == HPL_rone ); /* * Now compute b = 2.0**m with the smallest positive integer m such that * fl( a + b ) > a. */ b = HPL_rone; c = HPL_dlamc3( a, b ); while( c == a ) { b *= HPL_rtwo; c = HPL_dlamc3( a, b ); } /* * Now compute the base. a and c are neighbouring floating point num- * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to * BETA and not (BETA-1). */ qtr = one / 4.0; savec = c; c = HPL_dlamc3( c, -a ); lbeta = (int)(c+qtr); /* * Now determine whether rounding or chopping occurs, by adding a bit * less than BETA/2 and a bit more than BETA/2 to a. */ b = (double)(lbeta); f = HPL_dlamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_dlamc3( f, a ); if( c == a ) { lrnd = 1; } else { lrnd = 0; } f = HPL_dlamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_dlamc3( f, a ); if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; /* * Try and decide whether rounding is done in the IEEE round to nea- * rest style. b/2 is half a unit in the last place of the two numbers * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 * to savec should change savec. */ t1 = HPL_dlamc3( b / HPL_rtwo, a ); t2 = HPL_dlamc3( b / HPL_rtwo, savec ); if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; else lieee1 = 0; /* * Now find the mantissa, T. It should be the integer part of log to the * base BETA of a, however it is safer to determine T by powering. So we * find T as the smallest positive integer for which fl( beta**t + 1.0 ) * is equal to 1.0. */ lt = 0; a = HPL_rone; c = HPL_rone; do { lt++; a *= (double)(lbeta); c = HPL_dlamc3( a, one ); c = HPL_dlamc3( c, -a ); } while( c == HPL_rone ); } *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; } #ifdef HPL_STDC_HEADERS static void HPL_dlamc2 ( int * BETA, int * T, int * RND, double * EPS, int * EMIN, double * RMIN, int * EMAX, double * RMAX ) #else static void HPL_dlamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) /* * .. Scalar Arguments .. */ int * BETA, * EMAX, * EMIN, * RND, * T; double * EPS, * RMAX, * RMIN; #endif { /* * Purpose * ======= * * HPL_dlamc2 determines the machine parameters specified in its argu- * ment list. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamc2.f (version 2.0 -- 1992), that was itself * based on a function PARANOIA by W. Kahan of the University of Cali- * fornia at Berkeley for the computation of the relative machine epsi- * lon eps. * * Arguments * ========= * * BETA (local output) int * * The base of the machine. * * T (local output) int * * The number of ( BETA ) digits in the mantissa. * * RND (local output) int * * Specifies whether proper rounding (RND=1) or chopping (RND=0) * occurs in addition. This may not be a reliable guide to the * way in which the machine performs its arithmetic. * * EPS (local output) double * * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, * where fl denotes the computed value. * * EMIN (local output) int * * The minimum exponent before (gradual) underflow occurs. * * RMIN (local output) double * * The smallest normalized number for the machine, given by * BASE**( EMIN - 1 ), where BASE is the floating point value * of BETA. * * EMAX (local output) int * * The maximum exponent before overflow occurs. * * RMAX (local output) double * * The largest positive number for the machine, given by * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point * value of BETA. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ static double leps, lrmax, lrmin; double a, b, c, half, one, rbase, sixth, small, third, two, zero; static int first=1, iwarn=0, lbeta=0, lemax, lemin, lt=0; int gnmin=0, gpmin=0, i, ieee, lieee1=0, lrnd=0, ngnmin=0, ngpmin=0; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; /* * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, * T, RND, EPS, EMIN and RMIN. * * Throughout this routine we use the function HPL_dlamc3 to ensure that * relevant values are stored and not held in registers, or are not af- * fected by optimizers. * * HPL_dlamc1 returns the parameters lbeta, lt, lrnd and lieee1. */ HPL_dlamc1( &lbeta, <, &lrnd, &lieee1 ); /* * Start to find eps. */ b = (double)(lbeta); a = HPL_dipow( b, -lt ); leps = a; /* * Try some tricks to see whether or not this is the correct EPS. */ b = two / 3.0; half = one / HPL_rtwo; sixth = HPL_dlamc3( b, -half ); third = HPL_dlamc3( sixth, sixth ); b = HPL_dlamc3( third, -half ); b = HPL_dlamc3( b, sixth ); b = Mabs( b ); if( b < leps ) b = leps; leps = HPL_rone; while( ( leps > b ) && ( b > zero ) ) { leps = b; c = HPL_dlamc3( half * leps, HPL_dipow( two, 5 ) * HPL_dipow( leps, 2 ) ); c = HPL_dlamc3( half, -c ); b = HPL_dlamc3( half, c ); c = HPL_dlamc3( half, -b ); b = HPL_dlamc3( half, c ); } if( a < leps ) leps = a; /* * Computation of EPS complete. * * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep * dividing a by BETA until (gradual) underflow occurs. This is detected * when we cannot recover the previous a. */ rbase = one / (double)(lbeta); small = one; for( i = 0; i < 3; i++ ) small = HPL_dlamc3( small * rbase, zero ); a = HPL_dlamc3( one, small ); HPL_dlamc4( &ngpmin, one, lbeta ); HPL_dlamc4( &ngnmin, -one, lbeta ); HPL_dlamc4( &gpmin, a, lbeta ); HPL_dlamc4( &gnmin, -a, lbeta ); ieee = 0; if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) { if( ngpmin == gpmin ) { /* * Non twos-complement machines, no gradual underflow; e.g., VAX ) */ lemin = ngpmin; } else if( ( gpmin-ngpmin ) == 3 ) { /* * Non twos-complement machines with gradual underflow; e.g., IEEE stan- * dard followers */ lemin = ngpmin - 1 + lt; ieee = 1; } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, gpmin ); iwarn = 1; } } else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) { if( Mabs( ngpmin-ngnmin ) == 1 ) { /* * Twos-complement machines, no gradual underflow; e.g., CYBER 205 */ lemin = Mmax( ngpmin, ngnmin ); } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); iwarn = 1; } } else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) { if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) { /* * Twos-complement machines with gradual underflow; no known machine */ lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); iwarn = 1; } } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); lemin = Mmin( lemin, gnmin ); iwarn = 1; } /* * Comment out this if block if EMIN is ok */ if( iwarn != 0 ) { first = 1; HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", "WARNING. The value EMIN may be incorrect:- EMIN =", lemin, "If, after inspection, the value EMIN looks acceptable, please comment ", "out the if block as marked within the code of routine HPL_dlamc2, ", "otherwise supply EMIN explicitly." ); } /* * Assume IEEE arithmetic if we found denormalised numbers above, or if * arithmetic seems to round in the IEEE style, determined in routine * HPL_dlamc1. A true IEEE machine should have both things true; how- * ever, faulty machines may have one or the other. */ if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; else ieee = 0; /* * Compute RMIN by successive division by BETA. We could compute RMIN * as BASE**( EMIN - 1 ), but some machines underflow during this compu- * tation. */ lrmin = HPL_rone; for( i = 0; i < 1 - lemin; i++ ) lrmin = HPL_dlamc3( lrmin*rbase, zero ); /* * Finally, call HPL_dlamc5 to compute emax and rmax. */ HPL_dlamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); } *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; } #ifdef HPL_STDC_HEADERS static double HPL_dlamc3( const double A, const double B ) #else static double HPL_dlamc3( A, B ) /* * .. Scalar Arguments .. */ const double A, B; #endif { /* * Purpose * ======= * * HPL_dlamc3 is intended to force a and b to be stored prior to doing * the addition of a and b, for use in situations where optimizers * might hold one of these in a register. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamc3.f (version 2.0 -- 1992). * * Arguments * ========= * * A, B (local input) double * The values a and b. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ return( A + B ); } #ifdef HPL_STDC_HEADERS static void HPL_dlamc4 ( int * EMIN, const double START, const int BASE ) #else static void HPL_dlamc4( EMIN, START, BASE ) /* * .. Scalar Arguments .. */ int * EMIN; const int BASE; const double START; #endif { /* * Purpose * ======= * * HPL_dlamc4 is a service function for HPL_dlamc2. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamc4.f (version 2.0 -- 1992). * * Arguments * ========= * * EMIN (local output) int * * The minimum exponent before (gradual) underflow, computed by * setting A = START and dividing by BASE until the previous A * can not be recovered. * * START (local input) double * The starting point for determining EMIN. * * BASE (local input) int * The base of the machine. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double a, b1, b2, c1, c2, d1, d2, one, rbase, zero; int i; /* .. * .. Executable Statements .. */ a = START; one = HPL_rone; rbase = one / (double)(BASE); zero = HPL_rzero; *EMIN = 1; b1 = HPL_dlamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; do { (*EMIN)--; a = b1; b1 = HPL_dlamc3( a / BASE, zero ); c1 = HPL_dlamc3( b1 * BASE, zero ); d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; b2 = HPL_dlamc3( a * rbase, zero ); c2 = HPL_dlamc3( b2 / rbase, zero ); d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); } #ifdef HPL_STDC_HEADERS static void HPL_dlamc5 ( const int BETA, const int P, const int EMIN, const int IEEE, int * EMAX, double * RMAX ) #else static void HPL_dlamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) /* * .. Scalar Arguments .. */ const int BETA, EMIN, IEEE, P; int * EMAX; double * RMAX; #endif { /* * Purpose * ======= * * HPL_dlamc5 attempts to compute RMAX, the largest machine floating- * point number, without overflow. It assumes that EMAX + abs(EMIN) sum * approximately to a power of 2. It will fail on machines where this * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, * EMAX = 28718). It will also fail if the value supplied for EMIN is * too large (i.e. too close to zero), probably with overflow. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function dlamc5.f (version 2.0 -- 1992). * * Arguments * ========= * * BETA (local input) int * The base of floating-point arithmetic. * * P (local input) int * The number of base BETA digits in the mantissa of a floating- * point value. * * EMIN (local input) int * The minimum exponent before (gradual) underflow. * * IEEE (local input) int * A logical flag specifying whether or not the arithmetic sys- * tem is thought to comply with the IEEE standard. * * EMAX (local output) int * * The largest exponent before overflow. * * RMAX (local output) double * * The largest machine floating-point number. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double oldy=HPL_rzero, recbas, y, z; int exbits=1, expsum, i, lexp=1, nbits, try_, uexp; /* .. * .. Executable Statements .. */ /* * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). * We then assume that EMAX + abs( EMIN ) will sum approximately to the * bound that is closest to abs( EMIN ). (EMAX is the exponent of the * required number RMAX). */ l_10: try_ = (int)( (unsigned int)(lexp) << 1 ); if( try_ <= ( -EMIN ) ) { lexp = try_; exbits++; goto l_10; } if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try_; exbits++; } /* * Now -lexp is less than or equal to EMIN, and -uexp is greater than or * equal to EMIN. exbits is the number of bits needed to store the expo- * nent. */ if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) { expsum = (int)( (unsigned int)(lexp) << 1 ); } else { expsum = (int)( (unsigned int)(uexp) << 1 ); } /* * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. */ *EMAX = expsum + EMIN - 1; /* * nbits is the total number of bits needed to store a floating-point * number. */ nbits = 1 + exbits + P; if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) { /* * Either there are an odd number of bits used to store a floating-point * number, which is unlikely, or some bits are not used in the represen- * tation of numbers, which is possible, (e.g. Cray machines) or the * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), * which is perhaps the most likely. We have to assume the last alterna- * tive. If this is true, then we need to reduce EMAX by one because * there must be some way of representing zero in an implicit-bit sys- * tem. On machines like Cray we are reducing EMAX by one unnecessarily. */ (*EMAX)--; } if( IEEE != 0 ) { /* * Assume we are on an IEEE machine which reserves one exponent for in- * finity and NaN. */ (*EMAX)--; } /* * Now create RMAX, the largest machine number, which should be equal to * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being * careful that the result is less than 1.0. */ recbas = HPL_rone / (double)(BETA); z = (double)(BETA) - HPL_rone; y = HPL_rzero; for( i = 0; i < P; i++ ) { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_dlamc3( y, z ); } if( y >= HPL_rone ) y = oldy; /* * Now multiply by BETA**EMAX to get RMAX. */ for( i = 0; i < *EMAX; i++ ) y = HPL_dlamc3( y * BETA, HPL_rzero ); *RMAX = y; /* * End of HPL_dlamch */ } #ifdef HPL_STDC_HEADERS static double HPL_dipow ( const double X, const int N ) #else static double HPL_dipow( X, N ) /* * .. Scalar Arguments .. */ const int N; const double X; #endif { /* * Purpose * ======= * * HPL_dipow computes the integer n-th power of a real scalar x. * * Arguments * ========= * * X (local input) const double * The real scalar x. * * N (local input) const int * The integer power to raise x to. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double r, y=HPL_rone; int k, n; /* .. * .. Executable Statements .. */ if( X == HPL_rzero ) return( HPL_rzero ); if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } for( k = 0; k < n; k++ ) y *= r; return( y ); } hpcc-1.4.1/hpl/src/auxil/HPL_dlange.c0000644000000000000000000001553411256503657014140 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS double HPL_dlange ( const HPL_T_NORM NORM, const int M, const int N, const double * A, const int LDA ) #else double HPL_dlange ( NORM, M, N, A, LDA ) const HPL_T_NORM NORM; const int M; const int N; const double * A; const int LDA; #endif { /* * Purpose * ======= * * HPL_dlange returns the value of the one norm, or the infinity norm, * or the element of largest absolute value of a matrix A: * * max(abs(A(i,j))) when NORM = HPL_NORM_A, * norm1(A), when NORM = HPL_NORM_1, * normI(A), when NORM = HPL_NORM_I, * * where norm1 denotes the one norm of a matrix (maximum column sum) and * normI denotes the infinity norm of a matrix (maximum row sum). Note * that max(abs(A(i,j))) is not a matrix norm. * * Arguments * ========= * * NORM (local input) const HPL_T_NORM * On entry, NORM specifies the value to be returned by this * function as described above. * * M (local input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (local input) const double * * On entry, A points to an array of dimension (LDA,N), that * contains the matrix A. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,M). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double s, v0=HPL_rzero, * work = NULL; int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return( HPL_rzero ); if( NORM == HPL_NORM_A ) { /* * max( abs( A ) ) */ for( j = 0; j < N; j++ ) { for( i = 0; i < M; i++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } A += LDA - M; } } else if( NORM == HPL_NORM_1 ) { /* * Find norm_1( A ). */ work = (double*)malloc( (size_t)(N) * sizeof( double ) ); if( work == NULL ) { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } else { for( j = 0; j < N; j++ ) { s = HPL_rzero; for( i = 0; i < M; i++ ) { s += Mabs( *A ); A++; } work[j] = s; A += LDA - M; } /* * Find maximum sum of columns for 1-norm */ v0 = work[HPL_idamax( N, work, 1 )]; v0 = Mabs( v0 ); if( work ) free( work ); } } else if( NORM == HPL_NORM_I ) { /* * Find norm_inf( A ) */ work = (double*)malloc( (size_t)(M) * sizeof( double ) ); if( work == NULL ) { HPL_abort( __LINE__, "HPL_dlange", "Memory allocation failed" ); } else { for( i = 0; i < M; i++ ) { work[i] = HPL_rzero; } for( j = 0; j < N; j++ ) { for( i = 0; i < M; i++ ) { work[i] += Mabs( *A ); A++; } A += LDA - M; } /* * Find maximum sum of rows for inf-norm */ v0 = work[HPL_idamax( M, work, 1 )]; v0 = Mabs( v0 ); if( work ) free( work ); } } return( v0 ); /* * End of HPL_dlange */ } hpcc-1.4.1/hpl/src/auxil/HPL_dlaprnt.c0000644000000000000000000001252611256503657014350 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_dlaprnt ( const int M, const int N, double * A, const int IA, const int JA, const int LDA, const char * CMATNM ) #else void HPL_dlaprnt ( M, N, A, IA, JA, LDA, CMATNM ) const int M; const int N; double * A; const int IA; const int JA; const int LDA; const char * CMATNM; #endif { /* * Purpose * ======= * * HPL_dlaprnt prints to standard error an M-by-N matrix A. * * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A. M must be at * least zero. * * N (local input) const int * On entry, N specifies the number of columns of A. N must be * at least zero. * * A (local input) double * * On entry, A points to an array of dimension (LDA,N). * * IA (local input) const int * On entry, IA specifies the starting row index to be printed. * * JA (local input) const int * On entry, JA specifies the starting column index to be * printed. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,M). * * CMATNM (local input) const char * * On entry, CMATNM is the name of the matrix to be printed. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, j; /* .. * .. Executable Statements .. */ for( j = 0; j < N; j++ ) { for( i = 0; i < M; i++ ) { HPL_fprintf( stderr, "%s(%6d,%6d)=%30.18f\n", CMATNM, IA+i, JA+j, *(Mptr( A, i, j, LDA )) ); } } /* * End of HPL_dlaprnt */ } hpcc-1.4.1/hpl/src/auxil/HPL_dlatcpy.c0000644000000000000000000004125611256503657014346 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factors * #ifndef HPL_LATCPY_M_DEPTH * #define HPL_LATCPY_M_DEPTH 32 * #define HPL_LATCPY_LOG2_M_DEPTH 5 * #endif * #ifndef HPL_LATCPY_N_DEPTH * #define HPL_LATCPY_N_DEPTH 4 * #define HPL_LATCPY_LOG2_N_DEPTH 2 * #endif */ #ifndef HPL_LATCPY_M_DEPTH #define HPL_LATCPY_M_DEPTH 4 #define HPL_LATCPY_LOG2_M_DEPTH 2 #endif #ifndef HPL_LATCPY_N_DEPTH #define HPL_LATCPY_N_DEPTH 2 #define HPL_LATCPY_LOG2_N_DEPTH 1 #endif #ifdef HPL_STDC_HEADERS void HPL_dlatcpy ( const int M, const int N, const double * A, const int LDA, double * B, const int LDB ) #else void HPL_dlatcpy ( M, N, A, LDA, B, LDB ) const int M; const int N; const double * A; const int LDA; double * B; const int LDB; #endif { /* * Purpose * ======= * * HPL_dlatcpy copies the transpose of an array A into an array B. * * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of the array B and * the number of columns of A. M must be at least zero. * * N (local input) const int * On entry, N specifies the number of rows of the array A and * the number of columns of B. N must be at least zero. * * A (local input) const double * * On entry, A points to an array of dimension (LDA,M). * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,N). * * B (local output) double * * On entry, B points to an array of dimension (LDB,N). On exit, * B is overwritten with the transpose of A. * * LDB (local input) const int * On entry, LDB specifies the leading dimension of the array B. * LDB must be at least MAX(1,M). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef HPL_LATCPY_USE_COPY register int j; #else #if ( HPL_LATCPY_N_DEPTH == 1 ) const double * A0 = A; double * B0 = B; #elif ( HPL_LATCPY_N_DEPTH == 2 ) const double * A0 = A, * A1 = A + 1; double * B0 = B, * B1 = B + LDB; #elif ( HPL_LATCPY_N_DEPTH == 4 ) const double * A0 = A, * A1 = A + 1, * A2 = A + 2, * A3 = A + 3; double * B0 = B, * B1 = B + LDB, * B2 = B + (LDB << 1), * B3 = B + 3 * LDB; #endif const int incA = -M * LDA + (1 << HPL_LATCPY_LOG2_N_DEPTH), incB = ( (unsigned int)(LDB) << HPL_LATCPY_LOG2_N_DEPTH ) - M, incA0 = -M * LDA + 1, incB0 = LDB - M; int mu, nu; register int i, j; #endif /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; #ifdef HPL_LATCPY_USE_COPY for( j = 0; j < N; j++, B0 += LDB ) HPL_dcopy( M, A0+j, LDA, B0, 1 ); #else mu = (int)( ( (unsigned int)(M) >> HPL_LATCPY_LOG2_M_DEPTH ) << HPL_LATCPY_LOG2_M_DEPTH ); nu = (int)( ( (unsigned int)(N) >> HPL_LATCPY_LOG2_N_DEPTH ) << HPL_LATCPY_LOG2_N_DEPTH ); for( j = 0; j < nu; j += HPL_LATCPY_N_DEPTH ) { for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH ) { #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[ 0] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[ 0] = *A0; A0 += LDA; B1[ 0] = *A1; A1 += LDA; B2[ 0] = *A2; A2 += LDA; B3[ 0] = *A3; A3 += LDA; #endif #if ( HPL_LATCPY_M_DEPTH > 1 ) #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[ 1] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[ 1] = *A0; A0 += LDA; B1[ 1] = *A1; A1 += LDA; B2[ 1] = *A2; A2 += LDA; B3[ 1] = *A3; A3 += LDA; #endif #endif #if ( HPL_LATCPY_M_DEPTH > 2 ) #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[ 2] = *A0; A0 += LDA; B0[ 3] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[ 2] = *A0; A0 += LDA; B1[ 2] = *A1; A1 += LDA; B2[ 2] = *A2; A2 += LDA; B3[ 2] = *A3; A3 += LDA; B0[ 3] = *A0; A0 += LDA; B1[ 3] = *A1; A1 += LDA; B2[ 3] = *A2; A2 += LDA; B3[ 3] = *A3; A3 += LDA; #endif #endif #if ( HPL_LATCPY_M_DEPTH > 4 ) #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[ 4] = *A0; A0 += LDA; B0[ 5] = *A0; A0 += LDA; B0[ 6] = *A0; A0 += LDA; B0[ 7] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[ 4] = *A0; A0 += LDA; B1[ 4] = *A1; A1 += LDA; B2[ 4] = *A2; A2 += LDA; B3[ 4] = *A3; A3 += LDA; B0[ 5] = *A0; A0 += LDA; B1[ 5] = *A1; A1 += LDA; B2[ 5] = *A2; A2 += LDA; B3[ 5] = *A3; A3 += LDA; B0[ 6] = *A0; A0 += LDA; B1[ 6] = *A1; A1 += LDA; B2[ 6] = *A2; A2 += LDA; B3[ 6] = *A3; A3 += LDA; B0[ 7] = *A0; A0 += LDA; B1[ 7] = *A1; A1 += LDA; B2[ 7] = *A2; A2 += LDA; B3[ 7] = *A3; A3 += LDA; #endif #endif #if ( HPL_LATCPY_M_DEPTH > 8 ) #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[ 8] = *A0; A0 += LDA; B0[ 9] = *A0; A0 += LDA; B0[10] = *A0; A0 += LDA; B0[11] = *A0; A0 += LDA; B0[12] = *A0; A0 += LDA; B0[13] = *A0; A0 += LDA; B0[14] = *A0; A0 += LDA; B0[15] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[ 8] = *A0; A0 += LDA; B1[ 8] = *A1; A1 += LDA; B2[ 8] = *A2; A2 += LDA; B3[ 8] = *A3; A3 += LDA; B0[ 9] = *A0; A0 += LDA; B1[ 9] = *A1; A1 += LDA; B2[ 9] = *A2; A2 += LDA; B3[ 9] = *A3; A3 += LDA; B0[10] = *A0; A0 += LDA; B1[10] = *A1; A1 += LDA; B2[10] = *A2; A2 += LDA; B3[10] = *A3; A3 += LDA; B0[11] = *A0; A0 += LDA; B1[11] = *A1; A1 += LDA; B2[11] = *A2; A2 += LDA; B3[11] = *A3; A3 += LDA; B0[12] = *A0; A0 += LDA; B1[12] = *A1; A1 += LDA; B2[12] = *A2; A2 += LDA; B3[12] = *A3; A3 += LDA; B0[13] = *A0; A0 += LDA; B1[13] = *A1; A1 += LDA; B2[13] = *A2; A2 += LDA; B3[13] = *A3; A3 += LDA; B0[14] = *A0; A0 += LDA; B1[14] = *A1; A1 += LDA; B2[14] = *A2; A2 += LDA; B3[14] = *A3; A3 += LDA; B0[15] = *A0; A0 += LDA; B1[15] = *A1; A1 += LDA; B2[15] = *A2; A2 += LDA; B3[15] = *A3; A3 += LDA; #endif #endif #if ( HPL_LATCPY_M_DEPTH > 16 ) #if ( HPL_LATCPY_N_DEPTH == 1 ) B0[16] = *A0; A0 += LDA; B0[17] = *A0; A0 += LDA; B0[18] = *A0; A0 += LDA; B0[19] = *A0; A0 += LDA; B0[20] = *A0; A0 += LDA; B0[21] = *A0; A0 += LDA; B0[22] = *A0; A0 += LDA; B0[23] = *A0; A0 += LDA; B0[24] = *A0; A0 += LDA; B0[25] = *A0; A0 += LDA; B0[26] = *A0; A0 += LDA; B0[27] = *A0; A0 += LDA; B0[28] = *A0; A0 += LDA; B0[29] = *A0; A0 += LDA; B0[30] = *A0; A0 += LDA; B0[31] = *A0; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0[16] = *A0; A0 += LDA; B1[16] = *A1; A1 += LDA; B2[16] = *A2; A2 += LDA; B3[16] = *A3; A3 += LDA; B0[17] = *A0; A0 += LDA; B1[17] = *A1; A1 += LDA; B2[17] = *A2; A2 += LDA; B3[17] = *A3; A3 += LDA; B0[18] = *A0; A0 += LDA; B1[18] = *A1; A1 += LDA; B2[18] = *A2; A2 += LDA; B3[18] = *A3; A3 += LDA; B0[19] = *A0; A0 += LDA; B1[19] = *A1; A1 += LDA; B2[19] = *A2; A2 += LDA; B3[19] = *A3; A3 += LDA; B0[20] = *A0; A0 += LDA; B1[20] = *A1; A1 += LDA; B2[20] = *A2; A2 += LDA; B3[20] = *A3; A3 += LDA; B0[21] = *A0; A0 += LDA; B1[21] = *A1; A1 += LDA; B2[21] = *A2; A2 += LDA; B3[21] = *A3; A3 += LDA; B0[22] = *A0; A0 += LDA; B1[22] = *A1; A1 += LDA; B2[22] = *A2; A2 += LDA; B3[22] = *A3; A3 += LDA; B0[23] = *A0; A0 += LDA; B1[23] = *A1; A1 += LDA; B2[23] = *A2; A2 += LDA; B3[23] = *A3; A3 += LDA; B0[24] = *A0; A0 += LDA; B1[24] = *A1; A1 += LDA; B2[24] = *A2; A2 += LDA; B3[24] = *A3; A3 += LDA; B0[25] = *A0; A0 += LDA; B1[25] = *A1; A1 += LDA; B2[25] = *A2; A2 += LDA; B3[25] = *A3; A3 += LDA; B0[26] = *A0; A0 += LDA; B1[26] = *A1; A1 += LDA; B2[26] = *A2; A2 += LDA; B3[26] = *A3; A3 += LDA; B0[27] = *A0; A0 += LDA; B1[27] = *A1; A1 += LDA; B2[27] = *A2; A2 += LDA; B3[27] = *A3; A3 += LDA; B0[28] = *A0; A0 += LDA; B1[28] = *A1; A1 += LDA; B2[28] = *A2; A2 += LDA; B3[28] = *A3; A3 += LDA; B0[29] = *A0; A0 += LDA; B1[29] = *A1; A1 += LDA; B2[29] = *A2; A2 += LDA; B3[29] = *A3; A3 += LDA; B0[30] = *A0; A0 += LDA; B1[30] = *A1; A1 += LDA; B2[30] = *A2; A2 += LDA; B3[30] = *A3; A3 += LDA; B0[31] = *A0; A0 += LDA; B1[31] = *A1; A1 += LDA; B2[31] = *A2; A2 += LDA; B3[31] = *A3; A3 += LDA; #endif #endif #if ( HPL_LATCPY_N_DEPTH == 1 ) B0 += HPL_LATCPY_M_DEPTH; #elif ( HPL_LATCPY_N_DEPTH == 2 ) B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; #elif ( HPL_LATCPY_N_DEPTH == 4 ) B0 += HPL_LATCPY_M_DEPTH; B1 += HPL_LATCPY_M_DEPTH; B2 += HPL_LATCPY_M_DEPTH; B3 += HPL_LATCPY_M_DEPTH; #endif } for( i = mu; i < M; i++ ) { #if ( HPL_LATCPY_N_DEPTH == 1 ) *B0 = *A0; B0++; A0 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 2 ) *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; #elif ( HPL_LATCPY_N_DEPTH == 4 ) *B0 = *A0; B0++; A0 += LDA; *B1 = *A1; B1++; A1 += LDA; *B2 = *A2; B2++; A2 += LDA; *B3 = *A3; B3++; A3 += LDA; #endif } #if ( HPL_LATCPY_N_DEPTH == 1 ) A0 += incA; B0 += incB; #elif ( HPL_LATCPY_N_DEPTH == 2 ) A0 += incA; A1 += incA; B0 += incB; B1 += incB; #elif ( HPL_LATCPY_N_DEPTH == 4 ) A0 += incA; A1 += incA; A2 += incA; A3 += incA; B0 += incB; B1 += incB; B2 += incB; B3 += incB; #endif } for( j = nu; j < N; j++, B0 += incB0, A0 += incA0 ) { for( i = 0; i < mu; i += HPL_LATCPY_M_DEPTH, B0 += HPL_LATCPY_M_DEPTH ) { B0[ 0]=*A0; A0 += LDA; #if ( HPL_LATCPY_M_DEPTH > 1 ) B0[ 1]=*A0; A0 += LDA; #endif #if ( HPL_LATCPY_M_DEPTH > 2 ) B0[ 2]=*A0; A0 += LDA; B0[ 3]=*A0; A0 += LDA; #endif #if ( HPL_LATCPY_M_DEPTH > 4 ) B0[ 4]=*A0; A0 += LDA; B0[ 5]=*A0; A0 += LDA; B0[ 6]=*A0; A0 += LDA; B0[ 7]=*A0; A0 += LDA; #endif #if ( HPL_LATCPY_M_DEPTH > 8 ) B0[ 8]=*A0; A0 += LDA; B0[ 9]=*A0; A0 += LDA; B0[10]=*A0; A0 += LDA; B0[11]=*A0; A0 += LDA; B0[12]=*A0; A0 += LDA; B0[13]=*A0; A0 += LDA; B0[14]=*A0; A0 += LDA; B0[15]=*A0; A0 += LDA; #endif #if ( HPL_LATCPY_M_DEPTH > 16 ) B0[16]=*A0; A0 += LDA; B0[17]=*A0; A0 += LDA; B0[18]=*A0; A0 += LDA; B0[19]=*A0; A0 += LDA; B0[20]=*A0; A0 += LDA; B0[21]=*A0; A0 += LDA; B0[22]=*A0; A0 += LDA; B0[23]=*A0; A0 += LDA; B0[24]=*A0; A0 += LDA; B0[25]=*A0; A0 += LDA; B0[26]=*A0; A0 += LDA; B0[27]=*A0; A0 += LDA; B0[28]=*A0; A0 += LDA; B0[29]=*A0; A0 += LDA; B0[30]=*A0; A0 += LDA; B0[31]=*A0; A0 += LDA; #endif } for( i = mu; i < M; i++, B0++, A0 += LDA ) { *B0 = *A0; } } #endif /* * End of HPL_dlatcpy */ } hpcc-1.4.1/hpl/src/auxil/HPL_fprintf.c0000644000000000000000000001115211256503657014346 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_fprintf ( FILE * STREAM, const char * FORM, ... ) #else void HPL_fprintf( va_alist ) va_dcl #endif { /* * Purpose * ======= * * HPL_fprintf is a wrapper around fprintf flushing the output stream. * * * Arguments * ========= * * STREAM (local input) FILE * * On entry, STREAM specifies the output stream. * * FORM (local input) const char * * On entry, FORM specifies the format, i.e., how the subsequent * arguments are converted for output. * * (local input) ... * On entry, ... is the list of arguments to be printed within * the format string. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ va_list argptr; char cline[256]; #ifndef HPL_STDC_HEADERS FILE * STREAM; char * FORM; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_STDC_HEADERS va_start( argptr, FORM ); #else va_start( argptr ); STREAM = va_arg( argptr, FILE * ); FORM = va_arg( argptr, char * ); #endif (void) vsprintf( cline, FORM, argptr ); va_end( argptr ); (void) fprintf( STREAM, "%s", cline ); (void) fflush( STREAM ); /* * End of HPL_fprintf */ } hpcc-1.4.1/hpl/src/auxil/HPL_warn.c0000644000000000000000000001261011256503657013645 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_warn ( FILE * STREAM, int LINE, const char * SRNAME, const char * FORM, ... ) #else void HPL_warn( va_alist ) va_dcl #endif { /* * Purpose * ======= * * HPL_warn displays an error message. * * * Arguments * ========= * * STREAM (local input) FILE * * On entry, STREAM specifies the output stream. * * LINE (local input) int * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a positive line * number, it is ignored. * * SRNAME (local input) const char * * On entry, SRNAME should be the name of the routine calling * this error handler. * * FORM (local input) const char * * On entry, FORM specifies the format, i.e., how the subsequent * arguments are converted for output. * * (local input) ... * On entry, ... is the list of arguments to be printed within * the format string. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ va_list argptr; char cline[128]; #ifndef HPL_STDC_HEADERS FILE * STREAM; int LINE; char * FORM, * SRNAME; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_STDC_HEADERS va_start( argptr, FORM ); #else va_start( argptr ); STREAM = va_arg( argptr, FILE * ); LINE = va_arg( argptr, int ); SRNAME = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif (void) vsprintf( cline, FORM, argptr ); va_end( argptr ); /* * Display an error message */ if( LINE <= 0 ) HPL_fprintf( STREAM, "%s %s:\n>>> %s <<<\n\n", "HPL ERROR in function", SRNAME, cline ); else HPL_fprintf( STREAM, "%s %d %s %s:\n>>> %s <<<\n\n", "HPL ERROR on line", LINE, "of function", SRNAME, cline ); /* * End of HPL_warn */ } hpcc-1.4.1/hpl/src/blas/HPL_daxpy.c0000644000000000000000000001346211256503657013630 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_daxpy #ifdef HPL_STDC_HEADERS void HPL_daxpy ( const int N, const double ALPHA, const double * X, const int INCX, double * Y, const int INCY ) #else void HPL_daxpy ( N, ALPHA, X, INCX, Y, INCY ) const int N; const double ALPHA; const double * X; const int INCX; double * Y; const int INCY; #endif { /* * Purpose * ======= * * HPL_daxpy scales the vector x by alpha and adds it to y. * * * Arguments * ========= * * N (local input) const int * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero, then the entries of the incremented array X * need not be set on input. * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (local input/output) double * * On entry, Y is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. * On exit, the entries of the incremented array Y are updated * with the scaled entries of the incremented array X. * * INCY (local input) const int * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_daxpy( N, ALPHA, X, INCX, Y, INCY ); #endif #ifdef HPL_CALL_VSIPL register const double alpha = ALPHA; register double x0, x1, x2, x3, y0, y1, y2, y3; const double * StX; register int i; int nu; const int incX2 = 2 * INCX, incY2 = 2 * INCY, incX3 = 3 * INCX, incY3 = 3 * INCY, incX4 = 4 * INCX, incY4 = 4 * INCY; if( ( N > 0 ) && ( alpha != HPL_rzero ) ) { if( ( nu = ( N >> 2 ) << 2 ) != 0 ) { StX = X + nu * INCX; do { x0 = (*X); y0 = (*Y); x1 = X[INCX ]; y1 = Y[INCY ]; x2 = X[incX2]; y2 = Y[incY2]; x3 = X[incX3]; y3 = Y[incY3]; *Y = y0 + alpha * x0; Y[INCY ] = y1 + alpha * x1; Y[incY2] = y2 + alpha * x2; Y[incY3] = y3 + alpha * x3; X += incX4; Y += incY4; } while( X != StX ); } for( i = N - nu; i != 0; i-- ) { x0 = (*X); y0 = (*Y); *Y = y0 + alpha * x0; X += INCX; Y += INCY; } } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA; #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; #else #define F77N N #define F77incx INCX #define F77incy INCY #endif F77daxpy( &F77N, &alpha, X, &F77incx, Y, &F77incy ); #endif /* * End of HPL_daxpy */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dcopy.c0000644000000000000000000001502011256503657013611 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dcopy #ifdef HPL_STDC_HEADERS void HPL_dcopy ( const int N, const double * X, const int INCX, double * Y, const int INCY ) #else void HPL_dcopy ( N, X, INCX, Y, INCY ) const int N; const double * X; const int INCX; double * Y; const int INCY; #endif { /* * Purpose * ======= * * HPL_dcopy copies the vector x into the vector y. * * * Arguments * ========= * * N (local input) const int * On entry, N specifies the length of the vectors x and y. N * must be at least zero. * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (local input/output) double * * On entry, Y is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. * On exit, the entries of the incremented array Y are updated * with the entries of the incremented array X. * * INCY (local input) const int * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dcopy( N, X, INCX, Y, INCY ); #endif #ifdef HPL_CALL_VSIPL register double x0, x1, x2, x3, x4, x5, x6, x7; const double * StX; register int i; int nu; const int incX2 = 2 * INCX, incY2 = 2 * INCY, incX3 = 3 * INCX, incY3 = 3 * INCY, incX4 = 4 * INCX, incY4 = 4 * INCY, incX5 = 5 * INCX, incY5 = 5 * INCY, incX6 = 6 * INCX, incY6 = 6 * INCY, incX7 = 7 * INCX, incY7 = 7 * INCY, incX8 = 8 * INCX, incY8 = 8 * INCY; if( N > 0 ) { if( ( nu = ( N >> 3 ) << 3 ) != 0 ) { StX = X + nu * INCX; do { x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; *Y = x0; Y[incY4] = x4; Y[INCY ] = x1; Y[incY5] = x5; Y[incY2] = x2; Y[incY6] = x6; Y[incY3] = x3; Y[incY7] = x7; X += incX8; Y += incY8; } while( X != StX ); } for( i = N - nu; i != 0; i-- ) { x0 = (*X); *Y = x0; X += INCX; Y += INCY; } } #endif #ifdef HPL_CALL_FBLAS #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77N = N, F77incx = INCX, F77incy = INCY; #else #define F77N N #define F77incx INCX #define F77incy INCY #endif F77dcopy( &F77N, X, &F77incx, Y, &F77incy ); #endif /* * End of HPL_dcopy */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dgemm.c0000644000000000000000000004605411256503657013577 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dgemm #ifdef HPL_CALL_VSIPL #ifdef HPL_STDC_HEADERS static void HPL_dgemmNN ( const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else static void HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const int K, LDA, LDB, LDC, M, N; const double ALPHA, BETA; const double * A, * B; double * C; #endif { register double t0; int i, iail, iblj, icij, j, jal, jbj, jcj, l; for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) { HPL_dscal( M, BETA, C+jcj, 1 ); for( l = 0, jal = 0, iblj = jbj; l < K; l++, jal += LDA, iblj += 1 ) { t0 = ALPHA * B[iblj]; for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) { C[icij] += A[iail] * t0; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dgemmNT ( const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else static void HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const int K, LDA, LDB, LDC, M, N; const double ALPHA, BETA; const double * A, * B; double * C; #endif { register double t0; int i, iail, ibj, ibjl, icij, j, jal, jcj, l; for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) { HPL_dscal( M, BETA, C+jcj, 1 ); for( l = 0, jal = 0, ibjl = ibj; l < K; l++, jal += LDA, ibjl += LDB ) { t0 = ALPHA * B[ibjl]; for( i = 0, iail = jal, icij = jcj; i < M; i++, iail += 1, icij += 1 ) { C[icij] += A[iail] * t0; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dgemmTN ( const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else static void HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const int K, LDA, LDB, LDC, M, N; const double ALPHA, BETA; const double * A, * B; double * C; #endif { register double t0; int i, iai, iail, iblj, icij, j, jbj, jcj, l; for( j = 0, jbj = 0, jcj = 0; j < N; j++, jbj += LDB, jcj += LDC ) { for( i = 0, icij = jcj, iai = 0; i < M; i++, icij += 1, iai += LDA ) { t0 = HPL_rzero; for( l = 0, iail = iai, iblj = jbj; l < K; l++, iail += 1, iblj += 1 ) { t0 += A[iail] * B[iblj]; } if( BETA == HPL_rzero ) C[icij] = HPL_rzero; else C[icij] *= BETA; C[icij] += ALPHA * t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dgemmTT ( const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else static void HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const int K, LDA, LDB, LDC, M, N; const double ALPHA, BETA; const double * A, * B; double * C; #endif { register double t0; int i, iali, ibj, ibjl, icij, j, jai, jcj, l; for( j = 0, ibj = 0, jcj = 0; j < N; j++, ibj += 1, jcj += LDC ) { for( i = 0, icij = jcj, jai = 0; i < M; i++, icij += 1, jai += LDA ) { t0 = HPL_rzero; for( l = 0, iali = jai, ibjl = ibj; l < K; l++, iali += 1, ibjl += LDB ) t0 += A[iali] * B[ibjl]; if( BETA == HPL_rzero ) C[icij] = HPL_rzero; else C[icij] *= BETA; C[icij] += ALPHA * t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dgemm0 ( const enum HPL_TRANS TRANSA, const enum HPL_TRANS TRANSB, const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else static void HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const enum HPL_TRANS TRANSA, TRANSB; const int K, LDA, LDB, LDC, M, N; const double ALPHA, BETA; const double * A, * B; double * C; #endif { int i, j; if( ( M == 0 ) || ( N == 0 ) || ( ( ( ALPHA == HPL_rzero ) || ( K == 0 ) ) && ( BETA == HPL_rone ) ) ) return; if( ALPHA == HPL_rzero ) { for( j = 0; j < N; j++ ) { for( i = 0; i < M; i++ ) *(C+i+j*LDC) = HPL_rzero; } return; } if( TRANSB == HplNoTrans ) { if( TRANSA == HplNoTrans ) { HPL_dgemmNN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } else { HPL_dgemmTN( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } } else { if( TRANSA == HplNoTrans ) { HPL_dgemmNT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } else { HPL_dgemmTT( M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } } } #endif #ifdef HPL_STDC_HEADERS void HPL_dgemm ( const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANSA, const enum HPL_TRANS TRANSB, const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC ) #else void HPL_dgemm ( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ) const enum HPL_ORDER ORDER; const enum HPL_TRANS TRANSA; const enum HPL_TRANS TRANSB; const int M; const int N; const int K; const double ALPHA; const double * A; const int LDA; const double * B; const int LDB; const double BETA; double * C; const int LDC; #endif { /* * Purpose * ======= * * HPL_dgemm performs one of the matrix-matrix operations * * C := alpha * op( A ) * op( B ) + beta * C * * where op( X ) is one of * * op( X ) = X or op( X ) = X^T. * * Alpha and beta are scalars, and A, B and C are matrices, with op(A) * an m by k matrix, op(B) a k by n matrix and C an m by n matrix. * * Arguments * ========= * * ORDER (local input) const enum HPL_ORDER * On entry, ORDER specifies the storage format of the operands * as follows: * ORDER = HplRowMajor, * ORDER = HplColumnMajor. * * TRANSA (local input) const enum HPL_TRANS * On entry, TRANSA specifies the form of op(A) to be used in * the matrix-matrix operation follows: * TRANSA==HplNoTrans : op( A ) = A, * TRANSA==HplTrans : op( A ) = A^T, * TRANSA==HplConjTrans : op( A ) = A^T. * * TRANSB (local input) const enum HPL_TRANS * On entry, TRANSB specifies the form of op(B) to be used in * the matrix-matrix operation follows: * TRANSB==HplNoTrans : op( B ) = B, * TRANSB==HplTrans : op( B ) = B^T, * TRANSB==HplConjTrans : op( B ) = B^T. * * M (local input) const int * On entry, M specifies the number of rows of the matrix * op(A) and of the matrix C. M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the matrix * op(B) and the number of columns of the matrix C. N must be * at least zero. * * K (local input) const int * On entry, K specifies the number of columns of the matrix * op(A) and the number of rows of the matrix op(B). K must be * be at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the elements of the matrices A and B * need not be set on input. * * A (local input) const double * * On entry, A is an array of dimension (LDA,ka), where ka is * k when TRANSA==HplNoTrans, and is m otherwise. Before * entry with TRANSA==HplNoTrans, the leading m by k part of * the array A must contain the matrix A, otherwise the leading * k by m part of the array A must contain the matrix A. * * LDA (local input) const int * On entry, LDA specifies the first dimension of A as declared * in the calling (sub) program. When TRANSA==HplNoTrans then * LDA must be at least max(1,m), otherwise LDA must be at least * max(1,k). * * B (local input) const double * * On entry, B is an array of dimension (LDB,kb), where kb is * n when TRANSB==HplNoTrans, and is k otherwise. Before * entry with TRANSB==HplNoTrans, the leading k by n part of * the array B must contain the matrix B, otherwise the leading * n by k part of the array B must contain the matrix B. * * LDB (local input) const int * On entry, LDB specifies the first dimension of B as declared * in the calling (sub) program. When TRANSB==HplNoTrans then * LDB must be at least max(1,k), otherwise LDB must be at least * max(1,n). * * BETA (local input) const double * On entry, BETA specifies the scalar beta. When BETA is * supplied as zero then the elements of the matrix C need * not be set on input. * * C (local input/output) double * * On entry, C is an array of dimension (LDC,n). Before entry, * the leading m by n part of the array C must contain the * matrix C, except when beta is zero, in which case C need not * be set on entry. On exit, the array C is overwritten by the * m by n matrix ( alpha*op( A )*op( B ) + beta*C ). * * LDC (local input) const int * On entry, LDC specifies the first dimension of C as declared * in the calling (sub) program. LDC must be at least * max(1,m). * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dgemm( ORDER, TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); #endif #ifdef HPL_CALL_VSIPL if( ORDER == HplColumnMajor ) { HPL_dgemm0( TRANSA, TRANSB, M, N, K, ALPHA, A, LDA, B, LDB, BETA, C, LDC ); } else { HPL_dgemm0( TRANSB, TRANSA, N, M, K, ALPHA, B, LDB, A, LDA, BETA, C, LDC ); } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA, beta = BETA; #ifdef StringSunStyle #ifdef HPL_USE_F77_INTEGER_DEF F77_INTEGER IONE = 1; #else int IONE = 1; #endif #endif #ifdef StringStructVal F77_CHAR ftransa; F77_CHAR ftransb; #endif #ifdef StringStructPtr F77_CHAR ftransa; F77_CHAR ftransb; #endif #ifdef StringCrayStyle F77_CHAR ftransa; F77_CHAR ftransb; #endif #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77M = M, F77N = N, F77K = K, F77lda = LDA, F77ldb = LDB, F77ldc = LDC; #else #define F77M M #define F77N N #define F77K K #define F77lda LDA #define F77ldb LDB #define F77ldc LDC #endif char ctransa, ctransb; if( TRANSA == HplNoTrans ) ctransa = 'N'; else if( TRANSA == HplTrans ) ctransa = 'T'; else ctransa = 'C'; if( TRANSB == HplNoTrans ) ctransb = 'N'; else if( TRANSB == HplTrans ) ctransb = 'T'; else ctransb = 'C'; if( ORDER == HplColumnMajor ) { #ifdef StringSunStyle F77dgemm( &ctransa, &ctransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, B, &F77ldb, &beta, C, &F77ldc, IONE, IONE ); #endif #ifdef StringCrayStyle ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, B, &F77ldb, &beta, C, &F77ldc ); #endif #ifdef StringStructVal ftransa.len = 1; ftransa.cp = &ctransa; ftransb.len = 1; ftransb.cp = &ctransb; F77dgemm( ftransa, ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, B, &F77ldb, &beta, C, &F77ldc ); #endif #ifdef StringStructPtr ftransa.len = 1; ftransa.cp = &ctransa; ftransb.len = 1; ftransb.cp = &ctransb; F77dgemm( &ftransa, &ftransb, &F77M, &F77N, &F77K, &alpha, A, &F77lda, B, &F77ldb, &beta, C, &F77ldc ); #endif } else { #ifdef StringSunStyle F77dgemm( &ctransb, &ctransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, A, &F77lda, &beta, C, &F77ldc, IONE, IONE ); #endif #ifdef StringCrayStyle ftransa = HPL_C2F_CHAR( ctransa ); ftransb = HPL_C2F_CHAR( ctransb ); F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, A, &F77lda, &beta, C, &F77ldc ); #endif #ifdef StringStructVal ftransa.len = 1; ftransa.cp = &ctransa; ftransb.len = 1; ftransb.cp = &ctransb; F77dgemm( ftransb, ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, A, &F77lda, &beta, C, &F77ldc ); #endif #ifdef StringStructPtr ftransa.len = 1; ftransa.cp = &ctransa; ftransb.len = 1; ftransb.cp = &ctransb; F77dgemm( &ftransb, &ftransa, &F77N, &F77M, &F77K, &alpha, B, &F77ldb, A, &F77lda, &beta, C, &F77ldc ); #endif } #endif /* * End of HPL_dgemm */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dgemv.c0000644000000000000000000002752511256503657013612 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dgemv #ifdef HPL_CALL_VSIPL #ifdef HPL_STDC_HEADERS static void HPL_dgemv0 ( const enum HPL_TRANS TRANS, const int M, const int N, const double ALPHA, const double * A, const int LDA, const double * X, const int INCX, const double BETA, double * Y, const int INCY ) #else static void HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) const enum HPL_TRANS TRANS; const int INCX, INCY, LDA, M, N; const double ALPHA, BETA; const double * A, * X; double * Y; #endif { /* * .. Local Variables .. */ int i, iaij, ix, iy, j, jaj, jx, jy; register double t0; /* .. * .. Executable Statements .. */ if( ( M == 0 ) || ( N == 0 ) || ( ( ALPHA == HPL_rzero ) && ( BETA == HPL_rone ) ) ) return; if( ALPHA == HPL_rzero ) { HPL_dscal( M, BETA, Y, INCY ); return; } if( TRANS == HplNoTrans ) { HPL_dscal( M, BETA, Y, INCY ); for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) { t0 = ALPHA * X[jx]; for( i = 0, iaij = jaj, iy = 0; i < M; i++, iaij += 1, iy += INCY ) { Y[iy] += A[iaij] * t0; } } } else { for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) { t0 = HPL_rzero; for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) { t0 += A[iaij] * X[ix]; } if( BETA == HPL_rzero ) Y[jy] = ALPHA * t0; else Y[jy] = BETA * Y[jy] + ALPHA * t0; } } } #endif #ifdef HPL_STDC_HEADERS void HPL_dgemv ( const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANS, const int M, const int N, const double ALPHA, const double * A, const int LDA, const double * X, const int INCX, const double BETA, double * Y, const int INCY ) #else void HPL_dgemv ( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ) const enum HPL_ORDER ORDER; const enum HPL_TRANS TRANS; const int M; const int N; const double ALPHA; const double * A; const int LDA; const double * X; const int INCX; const double BETA; double * Y; const int INCY; #endif { /* * Purpose * ======= * * HPL_dgemv performs one of the matrix-vector operations * * y := alpha * op( A ) * x + beta * y, * * where op( X ) is one of * * op( X ) = X or op( X ) = X^T. * * where alpha and beta are scalars, x and y are vectors and A is an m * by n matrix. * * Arguments * ========= * * ORDER (local input) const enum HPL_ORDER * On entry, ORDER specifies the storage format of the operands * as follows: * ORDER = HplRowMajor, * ORDER = HplColumnMajor. * * TRANS (local input) const enum HPL_TRANS * On entry, TRANS specifies the operation to be performed as * follows: * TRANS = HplNoTrans y := alpha*A *x + beta*y, * TRANS = HplTrans y := alpha*A^T*x + beta*y. * * M (local input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then A and X need not be set on input. * * A (local input) const double * * On entry, A points to an array of size equal to or greater * than LDA * n. Before entry, the leading m by n part of the * array A must contain the matrix coefficients. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of A as * declared in the calling (sub) program. LDA must be at * least MAX(1,m). * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * BETA (local input) const double * On entry, BETA specifies the scalar beta. When ALPHA is * supplied as zero then Y need not be set on input. * * Y (local input/output) double * * On entry, Y is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. * Before entry with BETA non-zero, the incremented array Y must * contain the vector y. On exit, Y is overwritten by the * updated vector y. * * INCY (local input) const int * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dgemv( ORDER, TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); #endif #ifdef HPL_CALL_VSIPL if( ORDER == HplColumnMajor ) { HPL_dgemv0( TRANS, M, N, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); } else { HPL_dgemv0( ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), N, M, ALPHA, A, LDA, X, INCX, BETA, Y, INCY ); } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA, beta = BETA; #ifdef StringSunStyle #ifdef HPL_USE_F77_INTEGER_DEF F77_INTEGER IONE = 1; #else int IONE = 1; #endif #endif #ifdef StringStructVal F77_CHAR ftran; #endif #ifdef StringStructPtr F77_CHAR ftran; #endif #ifdef StringCrayStyle F77_CHAR ftran; #endif #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77M = M, F77N = N, F77lda = LDA, F77incx = INCX, F77incy = INCY; #else #define F77M M #define F77N N #define F77lda LDA #define F77incx INCX #define F77incy INCY #endif char ctran; if( ORDER == HplColumnMajor ) { ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); #ifdef StringSunStyle F77dgemv( &ctran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy, IONE ); #endif #ifdef StringCrayStyle ftran = HPL_C2F_CHAR( ctran ); F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif #ifdef StringStructVal ftran.len = 1; ftran.cp = &ctran; F77dgemv( ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif #ifdef StringStructPtr ftran.len = 1; ftran.cp = &ctran; F77dgemv( &ftran, &F77M, &F77N, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif } else { ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); #ifdef StringSunStyle F77dgemv( &ctran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy, IONE ); #endif #ifdef StringCrayStyle ftran = HPL_C2F_CHAR( ctran ); F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif #ifdef StringStructVal ftran.len = 1; ftran.cp = &ctran; F77dgemv( ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif #ifdef StringStructPtr ftran.len = 1; ftran.cp = &ctran; F77dgemv( &ftran, &F77N, &F77M, &alpha, A, &F77lda, X, &F77incx, &beta, Y, &F77incy ); #endif } #endif /* * End of HPL_dgemv */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dger.c0000644000000000000000000001771611256503657013432 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dger #ifdef HPL_STDC_HEADERS void HPL_dger ( const enum HPL_ORDER ORDER, const int M, const int N, const double ALPHA, const double * X, const int INCX, double * Y, const int INCY, double * A, const int LDA ) #else void HPL_dger ( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ) const enum HPL_ORDER ORDER; const int M; const int N; const double ALPHA; const double * X; const int INCX; double * Y; const int INCY; double * A; const int LDA; #endif { /* * Purpose * ======= * * HPL_dger performs the rank 1 operation * * A := alpha * x * y^T + A, * * where alpha is a scalar, x is an m-element vector, y is an n-element * vector and A is an m by n matrix. * * Arguments * ========= * * ORDER (local input) const enum HPL_ORDER * On entry, ORDER specifies the storage format of the operands * as follows: * ORDER = HplRowMajor, * ORDER = HplColumnMajor. * * M (local input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then X and Y need not be set on input. * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( m - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * Y (local input) double * * On entry, Y is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCY ) ) that contains the vector y. * * INCY (local input) const int * On entry, INCY specifies the increment for the elements of Y. * INCY must not be zero. * * A (local input/output) double * * On entry, A points to an array of size equal to or greater * than LDA * n. Before entry, the leading m by n part of the * array A must contain the matrix coefficients. On exit, A is * overwritten by the updated matrix. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of A as * declared in the calling (sub) program. LDA must be at * least MAX(1,m). * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dger( ORDER, M, N, ALPHA, X, INCX, Y, INCY, A, LDA ); #endif #ifdef HPL_CALL_VSIPL register double t0; int i, iaij, ix, iy, j, jaj, jx, jy; if( ( M == 0 ) || ( N == 0 ) || ( ALPHA == HPL_rzero ) ) return; if( ORDER == HplColumnMajor ) { for( j = 0, jaj = 0, jy = 0; j < N; j++, jaj += LDA, jy += INCY ) { t0 = ALPHA * Y[jy]; for( i = 0, iaij = jaj, ix = 0; i < M; i++, iaij += 1, ix += INCX ) { A[iaij] += X[ix] * t0; } } } else { for( j = 0, jaj = 0, jx = 0; j < M; j++, jaj += LDA, jx += INCX ) { t0 = ALPHA * X[jx]; for( i = 0, iaij = jaj, iy = 0; i < N; i++, iaij += 1, iy += INCY ) { A[iaij] += Y[iy] * t0; } } } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA; #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77M = M, F77N = N, F77lda = LDA, F77incx = INCX, F77incy = INCY; #else #define F77M M #define F77N N #define F77lda LDA #define F77incx INCX #define F77incy INCY #endif if( ORDER == HplColumnMajor ) { F77dger( &F77M, &F77N, &alpha, X, &F77incx, Y, &F77incy, A, &F77lda ); } else { F77dger( &F77N, &F77M, &alpha, Y, &F77incy, X, &F77incx, A, &F77lda ); } #endif /* * End of HPL_dger */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dscal.c0000644000000000000000000001546411256503657013575 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dscal #ifdef HPL_STDC_HEADERS void HPL_dscal ( const int N, const double ALPHA, double * X, const int INCX ) #else void HPL_dscal ( N, ALPHA, X, INCX ) const int N; const double ALPHA; double * X; const int INCX; #endif { /* * Purpose * ======= * * HPL_dscal scales the vector x by alpha. * * * Arguments * ========= * * N (local input) const int * On entry, N specifies the length of the vector x. N must be * at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero, then the entries of the incremented array X * need not be set on input. * * X (local input/output) double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * On exit, the entries of the incremented array X are scaled * by the scalar alpha. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dscal( N, ALPHA, X, INCX ); #endif #ifdef HPL_CALL_VSIPL register double x0, x1, x2, x3, x4, x5, x6, x7; register const double alpha = ALPHA; const double * StX; register int i; int nu; const int incX2 = 2 * INCX, incX3 = 3 * INCX, incX4 = 4 * INCX, incX5 = 5 * INCX, incX6 = 6 * INCX, incX7 = 7 * INCX, incX8 = 8 * INCX; if( ( N > 0 ) && ( alpha != HPL_rone ) ) { if( alpha == HPL_rzero ) { if( ( nu = ( N >> 3 ) << 3 ) != 0 ) { StX = (double *)X + nu * INCX; do { (*X) = HPL_rzero; X[incX4] = HPL_rzero; X[INCX ] = HPL_rzero; X[incX5] = HPL_rzero; X[incX2] = HPL_rzero; X[incX6] = HPL_rzero; X[incX3] = HPL_rzero; X[incX7] = HPL_rzero; X += incX8; } while( X != StX ); } for( i = N - nu; i != 0; i-- ) { *X = HPL_rzero; X += INCX; } } else { if( ( nu = ( N >> 3 ) << 3 ) != 0 ) { StX = X + nu * INCX; do { x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; x0 *= alpha; x4 *= alpha; x1 *= alpha; x5 *= alpha; x2 *= alpha; x6 *= alpha; x3 *= alpha; x7 *= alpha; (*X) = x0; X[incX4] = x4; X[INCX ] = x1; X[incX5] = x5; X[incX2] = x2; X[incX6] = x6; X[incX3] = x3; X[incX7] = x7; X += incX8; } while( X != StX ); } for( i = N - nu; i != 0; i-- ) { x0 = (*X); x0 *= alpha; *X = x0; X += INCX; } } } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA; #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77N = N, F77incx = INCX; #else #define F77N N #define F77incx INCX #endif F77dscal( &F77N, &alpha, X, &F77incx ); #endif /* * End of HPL_dscal */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dtrsm.c0000644000000000000000000007776211256503657013651 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dtrsm #ifdef HPL_CALL_VSIPL #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLLNN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iaik, ibij, ibkj, j, jak, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) { B[ibkj] /= A[k+jak]; for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; i < M; i++, iaik +=1, ibij += 1 ) { B[ibij] -= B[ibkj] * A[iaik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLLNU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iaik, ibij, ibkj, j, jak, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, ibij= jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = 0, jak = 0, ibkj = jbj; k < M; k++, jak += LDA, ibkj += 1 ) { for( i = k+1, iaik = k+1+jak, ibij = k+1+jbj; i < M; i++, iaik +=1, ibij += 1 ) { B[ibij] -= B[ibkj] * A[iaik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLLTN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iaki, ibij, ibkj, j, jai, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; i >= 0; i--, jai -= LDA, ibij -= 1 ) { t0 = ALPHA * B[ibij]; for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; k < M; k++, iaki += 1, ibkj += 1 ) { t0 -= A[iaki] * B[ibkj]; } t0 /= A[i+jai]; B[ibij] = t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLLTU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iaki, ibij, ibkj, j, jai, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = M-1, jai = (M-1)*LDA, ibij = M-1+jbj; i >= 0; i--, jai -= LDA, ibij -= 1 ) { t0 = ALPHA * B[ibij]; for( k = i+1, iaki = i+1+jai, ibkj = i+1+jbj; k < M; k++, iaki += 1, ibkj += 1 ) { t0 -= A[iaki] * B[ibkj]; } B[ibij] = t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLUNN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iaik, ibij, ibkj, j, jak, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; k >= 0; k--, jak -= LDA, ibkj -= 1 ) { B[ibkj] /= A[k+jak]; for( i = 0, iaik = jak, ibij = jbj; i < k; i++, iaik += 1, ibij += 1 ) { B[ibij] -= B[ibkj] * A[iaik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLUNU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iaik, ibij, ibkj, j, jak, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = M-1, jak = (M-1)*LDA, ibkj = M-1+jbj; k >= 0; k--, jak -= LDA, ibkj -= 1 ) { for( i = 0, iaik = jak, ibij = jbj; i < k; i++, iaik += 1, ibij += 1 ) { B[ibij] -= B[ibkj] * A[iaik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLUTN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iaki, ibij, ibkj, j, jai, jbj, k; register double t0; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) { t0 = ALPHA * B[ibij]; for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) { t0 -= A[iaki] * B[ibkj]; } t0 /= A[i+jai]; B[ibij] = t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmLUTU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iaki, ibij, ibkj, j, jai, jbj, k; for( j = 0, jbj = 0; j < N; j++, jbj += LDB ) { for( i = 0, jai = 0, ibij = jbj; i < M; i++, jai += LDA, ibij += 1 ) { t0 = ALPHA * B[ibij]; for( k = 0, iaki = jai, ibkj = jbj; k < i; k++, iaki += 1, ibkj += 1 ) { t0 -= A[iaki] * B[ibkj]; } B[ibij] = t0; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRLNN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; j >= 0; j--, jaj -= LDA, jbj -= LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; k < N; k++, iakj += 1, jbk += LDB ) { for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= A[iakj] * B[ibik]; } } for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRLNU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; for( j = N-1, jaj = (N-1)*LDA, jbj = (N-1)*LDB; j >= 0; j--, jaj -= LDA, jbj -= LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = j+1, iakj = j+1+jaj, jbk = (j+1)*LDB; k < N; k++, iakj += 1, jbk += LDB ) { for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= A[iakj] * B[ibik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRLTN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) { for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; j < N; j++, iajk += 1, jbj += LDB ) { t0 = A[iajk]; for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= t0 * B[ibik]; } } for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRLTU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; for( k = 0, jak = 0, jbk = 0; k < N; k++, jak += LDA, jbk += LDB ) { for( j = k+1, iajk = (k+1)+jak, jbj = (k+1)*LDB; j < N; j++, iajk += 1, jbj += LDB ) { t0 = A[iajk]; for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= t0 * B[ibik]; } } for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRUNN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) { for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= A[iakj] * B[ibik]; } } for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] /= A[j+jaj]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRUNU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, iakj, ibij, ibik, j, jaj, jbj, jbk, k; for( j = 0, jaj = 0, jbj = 0; j < N; j++, jaj += LDA, jbj += LDB ) { for( i = 0, ibij = jbj; i < M; i++, ibij += 1 ) { B[ibij] *= ALPHA; } for( k = 0, iakj = jaj, jbk = 0; k < j; k++, iakj += 1, jbk += LDB ) { for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= A[iakj] * B[ibik]; } } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRUTN ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; k >= 0; k--, jak -= LDA, jbk -= LDB ) { for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] /= A[k+jak]; } for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) { t0 = A[iajk]; for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= t0 * B[ibik]; } } for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsmRUTU ( const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ) const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { register double t0; int i, iajk, ibij, ibik, j, jak, jbj, jbk, k; for( k = N-1, jak = (N-1)*LDA, jbk = (N-1)*LDB; k >= 0; k--, jak -= LDA, jbk -= LDB ) { for( j = 0, iajk = jak, jbj = 0; j < k; j++, iajk += 1, jbj += LDB ) { t0 = A[iajk]; for( i = 0, ibij = jbj, ibik = jbk; i < M; i++, ibij += 1, ibik += 1 ) { B[ibij] -= t0 * B[ibik]; } } for( i = 0, ibik = jbk; i < M; i++, ibik += 1 ) { B[ibik] *= ALPHA; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsm0 ( const enum HPL_SIDE SIDE, const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else static void HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) const enum HPL_SIDE SIDE; const enum HPL_UPLO UPLO; const enum HPL_TRANS TRANS; const enum HPL_DIAG DIAG; const int LDA, LDB, M, N; const double ALPHA; const double * A; double * B; #endif { int i, j; if( ( M == 0 ) || ( N == 0 ) ) return; if( ALPHA == HPL_rzero ) { for( j = 0; j < N; j++ ) { for( i = 0; i < M; i++ ) *(B+i+j*LDB) = HPL_rzero; } return; } if( SIDE == HplLeft ) { if( UPLO == HplUpper ) { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsmLUNN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmLUNU( M, N, ALPHA, A, LDA, B, LDB ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsmLUTN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmLUTU( M, N, ALPHA, A, LDA, B, LDB ); } } } else { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsmLLNN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmLLNU( M, N, ALPHA, A, LDA, B, LDB ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsmLLTN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmLLTU( M, N, ALPHA, A, LDA, B, LDB ); } } } } else { if( UPLO == HplUpper ) { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsmRUNN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmRUNU( M, N, ALPHA, A, LDA, B, LDB ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsmRUTN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmRUTU( M, N, ALPHA, A, LDA, B, LDB ); } } } else { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsmRLNN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmRLNU( M, N, ALPHA, A, LDA, B, LDB ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsmRLTN( M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsmRLTU( M, N, ALPHA, A, LDA, B, LDB ); } } } } } #endif #ifdef HPL_STDC_HEADERS void HPL_dtrsm ( const enum HPL_ORDER ORDER, const enum HPL_SIDE SIDE, const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB ) #else void HPL_dtrsm ( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ) const enum HPL_ORDER ORDER; const enum HPL_SIDE SIDE; const enum HPL_UPLO UPLO; const enum HPL_TRANS TRANS; const enum HPL_DIAG DIAG; const int M; const int N; const double ALPHA; const double * A; const int LDA; double * B; const int LDB; #endif { /* * Purpose * ======= * * HPL_dtrsm solves one of the matrix equations * * op( A ) * X = alpha * B, or X * op( A ) = alpha * B, * * where alpha is a scalar, X and B are m by n matrices, A is a unit, or * non-unit, upper or lower triangular matrix and op(A) is one of * * op( A ) = A or op( A ) = A^T. * * The matrix X is overwritten on B. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Arguments * ========= * * ORDER (local input) const enum HPL_ORDER * On entry, ORDER specifies the storage format of the operands * as follows: * ORDER = HplRowMajor, * ORDER = HplColumnMajor. * * SIDE (local input) const enum HPL_SIDE * On entry, SIDE specifies whether op(A) appears on the left * or right of X as follows: * SIDE==HplLeft op( A ) * X = alpha * B, * SIDE==HplRight X * op( A ) = alpha * B. * * UPLO (local input) const enum HPL_UPLO * On entry, UPLO specifies whether the upper or lower * triangular part of the array A is to be referenced. When * UPLO==HplUpper, only the upper triangular part of A is to be * referenced, otherwise only the lower triangular part of A is * to be referenced. * * TRANS (local input) const enum HPL_TRANS * On entry, TRANSA specifies the form of op(A) to be used in * the matrix-matrix operation follows: * TRANSA==HplNoTrans : op( A ) = A, * TRANSA==HplTrans : op( A ) = A^T, * TRANSA==HplConjTrans : op( A ) = A^T. * * DIAG (local input) const enum HPL_DIAG * On entry, DIAG specifies whether A is unit triangular or * not. When DIAG==HplUnit, A is assumed to be unit triangular, * and otherwise, A is not assumed to be unit triangular. * * M (local input) const int * On entry, M specifies the number of rows of the matrix B. * M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the matrix B. * N must be at least zero. * * ALPHA (local input) const double * On entry, ALPHA specifies the scalar alpha. When ALPHA is * supplied as zero then the elements of the matrix B need not * be set on input. * * A (local input) const double * * On entry, A points to an array of size equal to or greater * than LDA * k, where k is m when SIDE==HplLeft and is n * otherwise. Before entry with UPLO==HplUpper, the leading * k by k upper triangular part of the array A must contain the * upper triangular matrix and the strictly lower triangular * part of A is not referenced. When UPLO==HplLower on entry, * the leading k by k lower triangular part of the array A must * contain the lower triangular matrix and the strictly upper * triangular part of A is not referenced. * * Note that when DIAG==HplUnit, the diagonal elements of A * not referenced either, but are assumed to be unity. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of A as * declared in the calling (sub) program. LDA must be at * least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise. * * B (local input/output) double * * On entry, B points to an array of size equal to or greater * than LDB * n. Before entry, the leading m by n part of the * array B must contain the matrix B, except when beta is zero, * in which case B need not be set on entry. On exit, the array * B is overwritten by the m by n solution matrix. * * LDB (local input) const int * On entry, LDB specifies the leading dimension of B as * declared in the calling (sub) program. LDB must be at * least MAX(1,m). * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dtrsm( ORDER, SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); #endif #ifdef HPL_CALL_VSIPL if( ORDER == HplColumnMajor ) { HPL_dtrsm0( SIDE, UPLO, TRANS, DIAG, M, N, ALPHA, A, LDA, B, LDB ); } else { HPL_dtrsm0( ( SIDE == HplRight ? HplLeft : HplRight ), ( UPLO == HplLower ? HplUpper : HplLower ), TRANS, DIAG, N, M, ALPHA, A, LDA, B, LDB ); } #endif #ifdef HPL_CALL_FBLAS double alpha = ALPHA; #ifdef StringSunStyle #if defined( HPL_USE_F77_INTEGER_DEF ) F77_INTEGER IONE = 1; #else int IONE = 1; #endif #endif #ifdef StringStructVal F77_CHAR fside; F77_CHAR fuplo; F77_CHAR ftran; F77_CHAR fdiag; #endif #ifdef StringStructPtr F77_CHAR fside; F77_CHAR fuplo; F77_CHAR ftran; F77_CHAR fdiag; #endif #ifdef StringCrayStyle F77_CHAR fside; F77_CHAR fuplo; F77_CHAR ftran; F77_CHAR fdiag; #endif #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77M = M, F77N = N, F77lda = LDA, F77ldb = LDB; #else #define F77M M #define F77N N #define F77lda LDA #define F77ldb LDB #endif char cside, cuplo, ctran, cdiag; if( TRANS == HplNoTrans ) ctran = 'N'; else if( TRANS == HplTrans ) ctran = 'T'; else ctran = 'C'; cdiag = ( DIAG == HplUnit ? 'U' : 'N' ); if( ORDER == HplColumnMajor ) { cside = ( SIDE == HplRight ? 'R' : 'L' ); cuplo = ( UPLO == HplLower ? 'L' : 'U' ); #ifdef StringSunStyle F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77M, &F77N, &alpha, A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); #endif #ifdef StringCrayStyle fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, A, &F77lda, B, &F77ldb ); #endif #ifdef StringStructVal fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsm( fside, fuplo, ftran, fdiag, &F77M, &F77N, &alpha, A, &F77lda, B, &F77ldb ); #endif #ifdef StringStructPtr fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77M, &F77N, &alpha, A, &F77lda, B, &F77ldb ); #endif } else { cside = ( SIDE == HplRight ? 'L' : 'R' ); cuplo = ( UPLO == HplLower ? 'U' : 'L' ); #ifdef StringSunStyle F77dtrsm( &cside, &cuplo, &ctran, &cdiag, &F77N, &F77M, &alpha, A, &F77lda, B, &F77ldb, IONE, IONE, IONE, IONE ); #endif #ifdef StringCrayStyle fside = HPL_C2F_CHAR( cside ); fuplo = HPL_C2F_CHAR( cuplo ); ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, A, &F77lda, B, &F77ldb ); #endif #ifdef StringStructVal fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsm( fside, fuplo, ftran, fdiag, &F77N, &F77M, &alpha, A, &F77lda, B, &F77ldb ); #endif #ifdef StringStructPtr fside.len = 1; fside.cp = &cside; fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsm( &fside, &fuplo, &ftran, &fdiag, &F77N, &F77M, &alpha, A, &F77lda, B, &F77ldb ); #endif } #endif /* * End of HPL_dtrsm */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_dtrsv.c0000644000000000000000000004176711256503657013656 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_dtrsv #ifdef HPL_CALL_VSIPL #ifdef HPL_STDC_HEADERS static void HPL_dtrsvLNN ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvLNN( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; register double t0; for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) { X[jx] /= A[jaj]; t0 = X[jx]; for( i = j+1, iaij = jaj+1, ix = jx + INCX; i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvLNU ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvLNU( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; register double t0; for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += ldap1, jx += INCX ) { t0 = X[jx]; for( i = j+1, iaij = jaj+1, ix = jx + INCX; i < N; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvLTN ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvLTN( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; register double t0; for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; j >= 0; j--, jaj -= ldap1, jx -= INCX ) { t0 = X[jx]; for( i = j+1, iaij = 1+jaj, ix = jx + INCX; i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } t0 /= A[jaj]; X[jx] = t0; } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvLTU ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvLTU( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx, ldap1 = LDA + 1; register double t0; for( j = N-1, jaj = (N-1)*(ldap1), jx = (N-1)*INCX; j >= 0; j--, jaj -= ldap1, jx -= INCX ) { t0 = X[jx]; for( i = j+1, iaij = 1+jaj, ix = jx + INCX; i < N; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } X[jx] = t0; } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvUNN ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvUNN( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx; register double t0; for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; j >= 0; j--, jaj -= LDA, jx -= INCX ) { X[jx] /= A[j+jaj]; t0 = X[jx]; for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvUNU ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvUNU( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx; register double t0; for( j = N-1, jaj = (N-1)*LDA, jx = (N-1)*INCX; j >= 0; j--, jaj -= LDA, jx -= INCX ) { t0 = X[jx]; for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) { X[ix] -= t0 * A[iaij]; } } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvUTN ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvUTN( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx; register double t0; for( j = 0, jaj = 0,jx = 0; j < N; j++, jaj += LDA, jx += INCX ) { t0 = X[jx]; for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } t0 /= A[iaij]; X[jx] = t0; } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsvUTU ( const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsvUTU( N, A, LDA, X, INCX ) const int INCX, LDA, N; const double * A; double * X; #endif { int i, iaij, ix, j, jaj, jx; register double t0; for( j = 0, jaj = 0, jx = 0; j < N; j++, jaj += LDA, jx += INCX ) { t0 = X[jx]; for( i = 0, iaij = jaj, ix = 0; i < j; i++, iaij += 1, ix += INCX ) { t0 -= A[iaij] * X[ix]; } X[jx] = t0; } } #ifdef HPL_STDC_HEADERS static void HPL_dtrsv0 ( const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int N, const double * A, const int LDA, double * X, const int INCX ) #else static void HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) const enum HPL_UPLO UPLO; const enum HPL_TRANS TRANS; const enum HPL_DIAG DIAG; const int INCX, LDA, N; const double * A; double * X; #endif { if( N == 0 ) return; if( UPLO == HplUpper ) { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsvUNN( N, A, LDA, X, INCX ); } else { HPL_dtrsvUNU( N, A, LDA, X, INCX ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsvUTN( N, A, LDA, X, INCX ); } else { HPL_dtrsvUTU( N, A, LDA, X, INCX ); } } } else { if( TRANS == HplNoTrans ) { if( DIAG == HplNonUnit ) { HPL_dtrsvLNN( N, A, LDA, X, INCX ); } else { HPL_dtrsvLNU( N, A, LDA, X, INCX ); } } else { if( DIAG == HplNonUnit ) { HPL_dtrsvLTN( N, A, LDA, X, INCX ); } else { HPL_dtrsvLTU( N, A, LDA, X, INCX ); } } } } #endif #ifdef HPL_STDC_HEADERS void HPL_dtrsv ( const enum HPL_ORDER ORDER, const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int N, const double * A, const int LDA, double * X, const int INCX ) #else void HPL_dtrsv ( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ) const enum HPL_ORDER ORDER; const enum HPL_UPLO UPLO; const enum HPL_TRANS TRANS; const enum HPL_DIAG DIAG; const int N; const double * A; const int LDA; double * X; const int INCX; #endif { /* * Purpose * ======= * * HPL_dtrsv solves one of the systems of equations * * A * x = b, or A^T * x = b, * * where b and x are n-element vectors and A is an n by n non-unit, or * unit, upper or lower triangular matrix. * * No test for singularity or near-singularity is included in this * routine. Such tests must be performed before calling this routine. * * Arguments * ========= * * ORDER (local input) const enum HPL_ORDER * On entry, ORDER specifies the storage format of the operands * as follows: * ORDER = HplRowMajor, * ORDER = HplColumnMajor. * * UPLO (local input) const enum HPL_UPLO * On entry, UPLO specifies whether the upper or lower * triangular part of the array A is to be referenced. When * UPLO==HplUpper, only the upper triangular part of A is to be * referenced, otherwise only the lower triangular part of A is * to be referenced. * * TRANS (local input) const enum HPL_TRANS * On entry, TRANS specifies the equations to be solved as * follows: * TRANS==HplNoTrans A * x = b, * TRANS==HplTrans A^T * x = b. * * DIAG (local input) const enum HPL_DIAG * On entry, DIAG specifies whether A is unit triangular or * not. When DIAG==HplUnit, A is assumed to be unit triangular, * and otherwise, A is not assumed to be unit triangular. * * N (local input) const int * On entry, N specifies the order of the matrix A. N must be at * least zero. * * A (local input) const double * * On entry, A points to an array of size equal to or greater * than LDA * n. Before entry with UPLO==HplUpper, the leading * n by n upper triangular part of the array A must contain the * upper triangular matrix and the strictly lower triangular * part of A is not referenced. When UPLO==HplLower on entry, * the leading n by n lower triangular part of the array A must * contain the lower triangular matrix and the strictly upper * triangular part of A is not referenced. * * Note that when DIAG==HplUnit, the diagonal elements of A * not referenced either, but are assumed to be unity. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of A as * declared in the calling (sub) program. LDA must be at * least MAX(1,n). * * X (local input/output) double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * Before entry, the incremented array X must contain the n * element right-hand side vector b. On exit, X is overwritten * with the solution vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS cblas_dtrsv( ORDER, UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); #endif #ifdef HPL_CALL_VSIPL if( ORDER == HplColumnMajor ) { HPL_dtrsv0( UPLO, TRANS, DIAG, N, A, LDA, X, INCX ); } else { HPL_dtrsv0( ( UPLO == HplUpper ? HplLower : HplUpper ), ( TRANS == HplNoTrans ? HplTrans : HplNoTrans ), DIAG, N, A, LDA, X, INCX ); } #endif #ifdef HPL_CALL_FBLAS #ifdef StringSunStyle #ifdef HPL_USE_F77_INTEGER_DEF F77_INTEGER IONE = 1; #else int IONE = 1; #endif #endif #ifdef StringStructVal F77_CHAR fuplo, ftran, fdiag; #endif #ifdef StringStructPtr F77_CHAR fuplo, ftran, fdiag; #endif #ifdef StringCrayStyle F77_CHAR fuplo, ftran, fdiag; #endif #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77N = N, F77lda = LDA, F77incx = INCX; #else #define F77N N #define F77lda LDA #define F77incx INCX #endif char cuplo, ctran, cdiag; if( ORDER == HplColumnMajor ) { cuplo = ( UPLO == HplUpper ? 'U' : 'L' ); ctran = ( TRANS == HplNoTrans ? 'N' : 'T' ); } else { cuplo = ( UPLO == HplUpper ? 'L' : 'U' ); ctran = ( TRANS == HplNoTrans ? 'T' : 'N' ); } cdiag = ( DIAG == HplNonUnit ? 'N' : 'U' ); #ifdef StringSunStyle F77dtrsv( &cuplo, &ctran, &cdiag, &F77N, A, &F77lda, X, &F77incx, IONE, IONE, IONE ); #endif #ifdef StringCrayStyle ftran = HPL_C2F_CHAR( ctran ); fdiag = HPL_C2F_CHAR( cdiag ); fuplo = HPL_C2F_CHAR( cuplo ); F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); #endif #ifdef StringStructVal fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsv( fuplo, ftran, fdiag, &F77N, A, &F77lda, X, &F77incx ); #endif #ifdef StringStructPtr fuplo.len = 1; fuplo.cp = &cuplo; ftran.len = 1; ftran.cp = &ctran; fdiag.len = 1; fdiag.cp = &cdiag; F77dtrsv( &fuplo, &ftran, &fdiag, &F77N, A, &F77lda, X, &F77incx ); #endif #endif /* * End of HPL_dtrsv */ } #endif hpcc-1.4.1/hpl/src/blas/HPL_idamax.c0000644000000000000000000001501711256503657013744 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifndef HPL_idamax #ifdef HPL_STDC_HEADERS int HPL_idamax ( const int N, const double * X, const int INCX ) #else int HPL_idamax ( N, X, INCX ) const int N; const double * X; const int INCX; #endif { /* * Purpose * ======= * * HPL_idamax returns the index in an n-vector x of the first element * having maximum absolute value. * * Arguments * ========= * * N (local input) const int * On entry, N specifies the length of the vector x. N must be * at least zero. * * X (local input) const double * * On entry, X is an incremented array of dimension at least * ( 1 + ( n - 1 ) * abs( INCX ) ) that contains the vector x. * * INCX (local input) const int * On entry, INCX specifies the increment for the elements of X. * INCX must not be zero. * * --------------------------------------------------------------------- */ #ifdef HPL_CALL_CBLAS return( (int)(cblas_idamax( N, X, INCX )) ); #endif #ifdef HPL_CALL_VSIPL register double absxi, smax = HPL_rzero, x0, x1, x2, x3, x4, x5, x6, x7; const double * StX; register int imax = 0, i = 0, j; int nu; const int incX2 = 2 * INCX, incX3 = 3 * INCX, incX4 = 4 * INCX, incX5 = 5 * INCX, incX6 = 6 * INCX, incX7 = 7 * INCX, incX8 = 8 * INCX; if( N > 0 ) { if( ( nu = ( N >> 3 ) << 3 ) != 0 ) { StX = X + nu * INCX; do { x0 = (*X); x4 = X[incX4]; x1 = X[INCX ]; x5 = X[incX5]; x2 = X[incX2]; x6 = X[incX6]; x3 = X[incX3]; x7 = X[incX7]; absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x1 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x2 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x3 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x4 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x5 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x6 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; absxi = Mabs( x7 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; X += incX8; } while( X != StX ); } for( j = N - nu; j != 0; j-- ) { x0 = (*X); absxi = Mabs( x0 ); if( absxi > smax ) { imax = i; smax = absxi; } i += 1; X += INCX; } } return( imax ); #endif #ifdef HPL_CALL_FBLAS #ifdef HPL_USE_F77_INTEGER_DEF const F77_INTEGER F77N = N, F77incx = INCX; #else #define F77N N #define F77incx INCX #endif int imax = 0; if( N > 0 ) imax = F77idamax( &F77N, X, &F77incx ) - 1; return( imax ); #endif /* * End of HPL_idamax */ } #endif hpcc-1.4.1/hpl/src/comm/HPL_1rinM.c0000644000000000000000000001716511256503657013507 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #ifdef HPL_STDC_HEADERS int HPL_binit_1rinM ( HPL_T_panel * PANEL ) #else int HPL_binit_1rinM( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif /* * Create the MPI user-defined data type */ ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); return( HPL_SUCCESS ); #endif } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF PANEL->buffers[0] #define _M_COUNT PANEL->counts[0] #define _M_TYPE PANEL->dtypes[0] #else #define _M_BUFF (void *)(PANEL->L2) #define _M_COUNT PANEL->len #define _M_TYPE MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_1rinM ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_1rinM( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int ierr, go, next, msgid, partner, prev, rank, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: If I am the root process, then send message to its two * next neighbors. Otherwise, probe for message. If the message is here, * then receive it, and if I am not the last process of the ring, or * just after the root process, then forward it to the next. Otherwise, * inform the caller that the panel has still not been received. */ rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; root = PANEL->pcol; msgid = PANEL->msgid; next = MModAdd1( rank, size ); if( rank == root ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, size ), msgid, comm ); } } else { prev = MModSub1( rank, size ); if( ( size > 2 ) && ( MModSub1( prev, size ) == root ) ) partner = root; else partner = prev; ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { if( go != 0 ) { ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, comm, &PANEL->status[0] ); if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && ( next != root ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); } } else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } } } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_1rinM ( HPL_T_panel * PANEL ) #else int HPL_bwait_1rinM( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } /* * Release the arrays of request / status / data-types and buffers */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_free( &PANEL->dtypes[0] ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else return( HPL_SUCCESS ); #endif } hpcc-1.4.1/hpl/src/comm/HPL_1ring.c0000644000000000000000000001637111256503657013537 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #ifdef HPL_STDC_HEADERS int HPL_binit_1ring ( HPL_T_panel * PANEL ) #else int HPL_binit_1ring( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif /* * Create the MPI user-defined data type */ ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); return( HPL_SUCCESS ); #endif } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF PANEL->buffers[0] #define _M_COUNT PANEL->counts[0] #define _M_TYPE PANEL->dtypes[0] #else #define _M_BUFF (void *)(PANEL->L2) #define _M_COUNT PANEL->len #define _M_TYPE MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_1ring ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_1ring( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int ierr, go, next, msgid, prev, rank, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: If I am the root process, start spreading the panel. If * I am not the root process, probe for message. If the message is here, * then receive it, and if I am not the last process of the ring, then * forward it to the next. Otherwise, inform the caller that the panel * has still not been received. */ rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; root = PANEL->pcol; msgid = PANEL->msgid; if( rank == root ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( rank, size ), msgid, comm ); } else { prev = MModSub1( rank, size ); ierr = MPI_Iprobe( prev, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { if( go != 0 ) { ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, prev, msgid, comm, &PANEL->status[0] ); next = MModAdd1( rank, size ); if( ( ierr == MPI_SUCCESS ) && ( next != root ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); } } else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } } } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_1ring ( HPL_T_panel * PANEL ) #else int HPL_bwait_1ring( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } /* * Release the arrays of request / status / data-types and buffers */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_free( &PANEL->dtypes[0] ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else return( HPL_SUCCESS ); #endif } hpcc-1.4.1/hpl/src/comm/HPL_2rinM.c0000644000000000000000000001775211256503657013512 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #ifdef HPL_STDC_HEADERS int HPL_binit_2rinM ( HPL_T_panel * PANEL ) #else int HPL_binit_2rinM( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif /* * Create the MPI user-defined data type */ ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); return( HPL_SUCCESS ); #endif } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF PANEL->buffers[0] #define _M_COUNT PANEL->counts[0] #define _M_TYPE PANEL->dtypes[0] #else #define _M_BUFF (void *)(PANEL->L2) #define _M_COUNT PANEL->len #define _M_TYPE MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_2rinM ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_2rinM( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int ierr, go, next, msgid, partner, prev, rank, roo2, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: root process send to its two right neighbors and mid-pro- * cess. If I am not the root process, probe for message. If the message * is there, then receive it. If I am not the last process of both rings * then forward it to the next. Otherwise, inform the caller that the * panel has still not been received. */ rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; root = PANEL->pcol; msgid = PANEL->msgid; next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); roo2 = MModAdd( root, roo2, size ); if( rank == root ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) { if( MModAdd1( next, size ) != roo2 ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, MModAdd1( next, size ), msgid, comm ); } if( ierr == MPI_SUCCESS ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, comm ); } } } else { prev = MModSub1( rank, size ); if( ( prev == root ) || ( rank == roo2 ) || ( MModSub1( prev, size ) == root ) ) partner = root; else partner = prev; ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { if( go != 0 ) { ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, comm, &PANEL->status[0] ); if( ( ierr == MPI_SUCCESS ) && ( prev != root ) && ( next != roo2 ) && ( next != root ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); } } else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } } } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_2rinM ( HPL_T_panel * PANEL ) #else int HPL_bwait_2rinM( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } /* * Release the arrays of request / status / data-types and buffers */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_free( &PANEL->dtypes[0] ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else return( HPL_SUCCESS ); #endif } hpcc-1.4.1/hpl/src/comm/HPL_2ring.c0000644000000000000000000001713511256503657013537 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #ifdef HPL_STDC_HEADERS int HPL_binit_2ring ( HPL_T_panel * PANEL ) #else int HPL_binit_2ring( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif /* * Create the MPI user-defined data type */ ierr = HPL_packL( PANEL, 0, PANEL->len, 0 ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); return( HPL_SUCCESS ); #endif } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF PANEL->buffers[0] #define _M_COUNT PANEL->counts[0] #define _M_TYPE PANEL->dtypes[0] #else #define _M_BUFF (void *)(PANEL->L2) #define _M_COUNT PANEL->len #define _M_TYPE MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_2ring ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_2ring( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int ierr, go, next, msgid, partner, rank, roo2, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: root process send to its right neighbor and mid-process. * If I am not the root process, probe for message. If the message is * there, then receive it, and if I am not the last process of both * rings, then forward it to the next. Otherwise, inform the caller that * the panel has still not been received. */ rank = PANEL->grid->mycol; comm = PANEL->grid->row_comm; root = PANEL->pcol; msgid = PANEL->msgid; next = MModAdd1( rank, size ); roo2 = ( ( size + 1 ) >> 1 ); roo2 = MModAdd( root, roo2, size ); if( rank == root ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); if( ( ierr == MPI_SUCCESS ) && ( size > 2 ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, roo2, msgid, comm ); } } else { partner = MModSub1( rank, size ); if( ( partner == root ) || ( rank == roo2 ) ) partner = root; ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { if( go != 0 ) { ierr = MPI_Recv( _M_BUFF, _M_COUNT, _M_TYPE, partner, msgid, comm, &PANEL->status[0] ); if( ( ierr == MPI_SUCCESS ) && ( next != roo2 ) && ( next != root ) ) { ierr = MPI_Send( _M_BUFF, _M_COUNT, _M_TYPE, next, msgid, comm ); } } else { *IFLAG = HPL_KEEP_TESTING; return( *IFLAG ); } } } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_2ring ( HPL_T_panel * PANEL ) #else int HPL_bwait_2ring( PANEL ) HPL_T_panel * PANEL; #endif { #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ int ierr; #endif /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } /* * Release the arrays of request / status / data-types and buffers */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_free( &PANEL->dtypes[0] ); return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); #else return( HPL_SUCCESS ); #endif } hpcc-1.4.1/hpl/src/comm/HPL_bcast.c0000644000000000000000000001212211256503657013601 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_bcast ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast ( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * Purpose * ======= * * HPL_bcast broadcasts the current panel. Successful completion is * indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to * HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was * not completed, in which case this function should be called again. * * Arguments * ========= * * PANEL (input/output) HPL_T_panel * * On entry, PANEL points to the current panel data structure * being broadcast. * * IFLAG (output) int * * On exit, IFLAG indicates whether or not the broadcast has * occured. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int ierr; HPL_T_TOP top; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Retrieve the selected virtual broadcast topology */ top = PANEL->algo->btopo; switch( top ) { case HPL_1RING_M : ierr = HPL_bcast_1rinM( PANEL, IFLAG ); break; case HPL_1RING : ierr = HPL_bcast_1ring( PANEL, IFLAG ); break; case HPL_2RING_M : ierr = HPL_bcast_2rinM( PANEL, IFLAG ); break; case HPL_2RING : ierr = HPL_bcast_2ring( PANEL, IFLAG ); break; case HPL_BLONG_M : ierr = HPL_bcast_blonM( PANEL, IFLAG ); break; case HPL_BLONG : ierr = HPL_bcast_blong( PANEL, IFLAG ); break; default : ierr = HPL_SUCCESS; } return( ierr ); /* * End of HPL_bcast */ } hpcc-1.4.1/hpl/src/comm/HPL_binit.c0000644000000000000000000001104011256503657013610 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_binit ( HPL_T_panel * PANEL ) #else int HPL_binit ( PANEL ) HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_binit initializes a row broadcast. Successful completion is * indicated by the returned error code HPL_SUCCESS. * * Arguments * ========= * * PANEL (input/output) HPL_T_panel * * On entry, PANEL points to the current panel data structure * being broadcast. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int ierr; HPL_T_TOP top; /* .. * .. Executable Statements .. */ if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); /* * Retrieve the selected virtual broadcast topology */ top = PANEL->algo->btopo; switch( top ) { case HPL_1RING_M : ierr = HPL_binit_1rinM( PANEL ); break; case HPL_1RING : ierr = HPL_binit_1ring( PANEL ); break; case HPL_2RING_M : ierr = HPL_binit_2rinM( PANEL ); break; case HPL_2RING : ierr = HPL_binit_2ring( PANEL ); break; case HPL_BLONG_M : ierr = HPL_binit_blonM( PANEL ); break; case HPL_BLONG : ierr = HPL_binit_blong( PANEL ); break; default : ierr = HPL_SUCCESS; } return( ierr ); /* * End of HPL_binit */ } hpcc-1.4.1/hpl/src/comm/HPL_blonM.c0000644000000000000000000003676211256503657013574 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #define I_SEND 0 #define I_RECV 1 #ifdef HPL_STDC_HEADERS int HPL_binit_blonM ( HPL_T_panel * PANEL ) #else int HPL_binit_blonM( PANEL ) HPL_T_panel * PANEL; #endif { /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF_S1 PANEL->buffers[I_SEND] #define _M_COUNT_S1 PANEL->counts[I_SEND] #define _M_TYPE_S1 PANEL->dtypes[I_SEND] #define _M_BUFF_S2 PANEL->buffers[I_SEND] #define _M_COUNT_S2 PANEL->counts[I_SEND] #define _M_TYPE_S2 PANEL->dtypes[I_SEND] #define _M_BUFF_R1 PANEL->buffers[I_RECV] #define _M_COUNT_R1 PANEL->counts[I_RECV] #define _M_TYPE_R1 PANEL->dtypes[I_RECV] #define _M_BUFF_R2 PANEL->buffers[I_RECV] #define _M_COUNT_R2 PANEL->counts[I_RECV] #define _M_TYPE_R2 PANEL->dtypes[I_RECV] #define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] #define _M_ROLL_COUNT_S PANEL->counts[I_SEND] #define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] #define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] #define _M_ROLL_COUNT_R PANEL->counts[I_RECV] #define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] #else #define _M_BUFF_S1 (void *)(PANEL->L2) #define _M_COUNT_S1 PANEL->len #define _M_TYPE_S1 MPI_DOUBLE #define _M_BUFF_S2 (void *)(PANEL->L2 + ibuf) #define _M_COUNT_S2 lbuf #define _M_TYPE_S2 MPI_DOUBLE #define _M_BUFF_R1 (void *)(PANEL->L2) #define _M_COUNT_R1 PANEL->len #define _M_TYPE_R1 MPI_DOUBLE #define _M_BUFF_R2 (void *)(PANEL->L2 + ibuf) #define _M_COUNT_R2 lbuf #define _M_TYPE_R2 MPI_DOUBLE #define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) #define _M_ROLL_COUNT_S lbufS #define _M_ROLL_TYPE_S MPI_DOUBLE #define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) #define _M_ROLL_COUNT_R lbufR #define _M_ROLL_TYPE_R MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_blonM ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_blonM( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int COUNT, count, go=1, ierr=MPI_SUCCESS, ibuf, ibufR, ibufS, dummy=0, indx, ip2=1, k, l, lbuf, lbufR, lbufS, mask=1, msgid, mydist, mydist2, next, npm1, npm2, partner, prev, rank, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: root process sends to its right neighbor, then spread * the panel on the other npcol - 2 processes. If I am not the root * process, probe for message received. If the message is there, then * receive it. If I am just after the root process, return. Otherwise, * keep spreading on those npcol - 2 processes. Otherwise, inform the * caller that the panel has still not been received. */ comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; root = PANEL->pcol; msgid = PANEL->msgid; prev = MModSub1( rank, size ); if( rank == root ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, 0, PANEL->len, I_SEND ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Ssend( _M_BUFF_S1, _M_COUNT_S1, _M_TYPE_S1, MModAdd1( rank, size ), msgid, comm ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); #endif } else if( prev == root ) { /* * This probing mechanism causes problems when lookhead is on. Too many * messages are exchanged in this virtual topology causing a hang on * some machines. It is currently disabled until a better understanding * is acquired. * * ierr = MPI_Iprobe( root, msgid, comm, &go, &PANEL->status[0] ); */ if( ierr == MPI_SUCCESS ) { /* if panel is here, proceed */ if( go != 0 ) { #ifdef HPL_USE_MPI_DATATYPE ierr = HPL_packL( PANEL, 0, PANEL->len, I_RECV ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( _M_BUFF_R1, _M_COUNT_R1, _M_TYPE_R1, root, msgid, comm, &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); #endif } else { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } } } /* * if I am just after the root, exit now. The message receive completed * successfully, this guy is done. If there are only 2 processes in each * row of processes, we are done as well. */ if( ( prev == root ) || ( size == 2 ) ) { *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } /* * Otherwise, proceed with broadcast - Spread the panel across process * columns */ npm2 = ( npm1 = size - 1 ) - 1; COUNT = PANEL->len; k = npm2; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } if( rank == root ) mydist2 = ( mydist = 0 ); else mydist2 = ( mydist = MModSub( rank, root, size ) - 1 ); indx = ip2; count = COUNT / npm1; count = Mmax( count, 1 ); do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { lbuf = COUNT - ( ibuf = indx * count ); if( indx + ip2 < npm1 ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } partner = mydist ^ ip2; if( ( mydist & ip2 ) != 0 ) { partner = MModAdd( root, partner, size ); if( partner != root ) partner = MModAdd1( partner, size ); /* * This probing mechanism causes problems when lookhead is on. Too many * messages are exchanged in this virtual topology causing a hang on * some machines. It is currently disabled until a better understanding * is acquired. */ #if 0 ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { /* if panel is not here, return and keep testing */ if( go == 0 ) { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } } #endif if( lbuf > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( _M_BUFF_R2, _M_COUNT_R2, _M_TYPE_R2, partner, msgid, comm, &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); #endif } else /* Recv message of length zero to enable probe */ { if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->status[0] ); } } else if( partner < npm1 ) { partner = MModAdd( root, partner, size ); if( partner != root ) partner = MModAdd1( partner, size ); if( lbuf > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Ssend( _M_BUFF_S2, _M_COUNT_S2, _M_TYPE_S2, partner, msgid, comm ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); #endif } else /* Recv message of length zero to enable probe */ { if( ierr == MPI_SUCCESS ) ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm ); } } } if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } } while( ip2 > 0 ); /* * Roll the pieces */ prev = MModSub1( rank, size ); if( MModSub1( prev, size ) == root ) prev = root; next = MModAdd1( rank, size ); if( rank == root ) next = MModAdd1( next, size ); for( k = 0; k < npm2; k++ ) { l = ( k >> 1 ); /* * Who is sending to who and how much */ if( ( ( mydist + k ) & 1 ) != 0 ) { ibufS = ( indx = MModAdd( mydist, l, npm1 ) ) * count; lbufS = ( indx == npm2 ? COUNT : ibufS + count ); lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); ibufR = ( indx = MModSub( mydist, l+1, npm1 ) ) * count; lbufR = ( indx == npm2 ? COUNT : ibufR + count ); lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); partner = prev; } else { ibufS = ( indx = MModSub( mydist, l, npm1 ) ) * count; lbufS = ( indx == npm2 ? COUNT : ibufS + count ); lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); ibufR = ( indx = MModAdd( mydist, l+1, npm1 ) ) * count; lbufR = ( indx == npm2 ? COUNT : ibufR + count ); lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); partner = next; } /* * Exchange the messages */ if( lbufS > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, _M_ROLL_TYPE_S, partner, msgid, comm, &PANEL->request[0] ); } else { if( ierr == MPI_SUCCESS ) ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->request[0] ); } if( lbufR > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, _M_ROLL_TYPE_R, partner, msgid, comm, &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); #endif } else { if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->status[0] ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); #endif } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_blonM ( HPL_T_panel * PANEL ) #else int HPL_bwait_blonM( PANEL ) HPL_T_panel * PANEL; #endif { /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } return( HPL_SUCCESS ); } hpcc-1.4.1/hpl/src/comm/HPL_blong.c0000644000000000000000000003127411256503657013617 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #define I_SEND 0 #define I_RECV 1 #ifdef HPL_STDC_HEADERS int HPL_binit_blong ( HPL_T_panel * PANEL ) #else int HPL_binit_blong( PANEL ) HPL_T_panel * PANEL; #endif { /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #ifdef HPL_COPY_L /* * Copy the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif #else /* * Force the copy of the panel into a contiguous buffer */ HPL_copyL( PANEL ); #endif return( HPL_SUCCESS ); } #ifdef HPL_USE_MPI_DATATYPE #define _M_BUFF_S PANEL->buffers[I_SEND] #define _M_COUNT_S PANEL->counts[I_SEND] #define _M_TYPE_S PANEL->dtypes[I_SEND] #define _M_BUFF_R PANEL->buffers[I_RECV] #define _M_COUNT_R PANEL->counts[I_RECV] #define _M_TYPE_R PANEL->dtypes[I_RECV] #define _M_ROLL_BUFF_S PANEL->buffers[I_SEND] #define _M_ROLL_COUNT_S PANEL->counts[I_SEND] #define _M_ROLL_TYPE_S PANEL->dtypes[I_SEND] #define _M_ROLL_BUFF_R PANEL->buffers[I_RECV] #define _M_ROLL_COUNT_R PANEL->counts[I_RECV] #define _M_ROLL_TYPE_R PANEL->dtypes[I_RECV] #else #define _M_BUFF_S (void *)(PANEL->L2 + ibuf) #define _M_COUNT_S lbuf #define _M_TYPE_S MPI_DOUBLE #define _M_BUFF_R (void *)(PANEL->L2 + ibuf) #define _M_COUNT_R lbuf #define _M_TYPE_R MPI_DOUBLE #define _M_ROLL_BUFF_S (void *)(PANEL->L2 + ibufS) #define _M_ROLL_COUNT_S lbufS #define _M_ROLL_TYPE_S MPI_DOUBLE #define _M_ROLL_BUFF_R (void *)(PANEL->L2 + ibufR) #define _M_ROLL_COUNT_R lbufR #define _M_ROLL_TYPE_R MPI_DOUBLE #endif #ifdef HPL_STDC_HEADERS int HPL_bcast_blong ( HPL_T_panel * PANEL, int * IFLAG ) #else int HPL_bcast_blong( PANEL, IFLAG ) HPL_T_panel * PANEL; int * IFLAG; #endif { /* * .. Local Variables .. */ MPI_Comm comm; int COUNT, count, dummy=0, ierr=MPI_SUCCESS, ibuf, ibufR, ibufS, indx, ip2, k, l, lbuf, lbufR, lbufS, mask, msgid, mydist, mydist2, next, npm1, partner, prev, rank, root, size; /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } if( ( size = PANEL->grid->npcol ) <= 1 ) { *IFLAG = HPL_SUCCESS; return( HPL_SUCCESS ); } /* * Cast phase: If I am the root process, start spreading the panel. If * I am not the root process, test for message receive completion. If * the message is there, then receive it, and keep spreading in a * blocking fashion this time. Otherwise, inform the caller that the * panel has still not been received. */ comm = PANEL->grid->row_comm; rank = PANEL->grid->mycol; mask = PANEL->grid->col_mask; ip2 = PANEL->grid->col_ip2m1; root = PANEL->pcol; msgid = PANEL->msgid; COUNT = PANEL->len; npm1 = size - 1; mydist2 = ( mydist = MModSub( rank, root, size ) ); indx = ip2; count = COUNT / size; count = Mmax( count, 1 ); /* * Spread the panel across process columns */ do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { lbuf = COUNT - ( ibuf = indx * count ); if( indx + ip2 < size ) { l = ip2 * count; lbuf = Mmin( lbuf, l ); } partner = mydist ^ ip2; if( ( mydist & ip2 ) != 0 ) { partner = MModAdd( root, partner, size ); /* * This probing mechanism causes problems when lookhead is on. Too many * messages are exchanged in this virtual topology causing a hang on * some machines. It is currently disabled until a better understanding * is acquired. */ #if 0 ierr = MPI_Iprobe( partner, msgid, comm, &go, &PANEL->status[0] ); if( ierr == MPI_SUCCESS ) { /* if panel is not here, return and keep testing */ if( go == 0 ) { *IFLAG = HPL_KEEP_TESTING; return( HPL_KEEP_TESTING ); } } #endif if( lbuf > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibuf, lbuf, I_RECV ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( _M_BUFF_R, _M_COUNT_R, _M_TYPE_R, partner, msgid, comm, &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); #endif } else /* Recv message of length zero to enable probe */ { if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->status[0] ); } } else if( partner < size ) { partner = MModAdd( root, partner, size ); if( lbuf > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibuf, lbuf, I_SEND ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Ssend( _M_BUFF_S, _M_COUNT_S, _M_TYPE_S, partner, msgid, comm ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); #endif } else /* Send message of length zero to enable probe */ { if( ierr == MPI_SUCCESS ) ierr = MPI_Ssend( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm ); } } } if( mydist2 < ip2 ) { ip2 >>= 1; indx -= ip2; } else { mydist2 -= ip2; ip2 >>= 1; indx += ip2; } } while( ip2 > 0 ); /* * Roll the pieces */ prev = MModSub1( rank, size ); next = MModAdd1( rank, size ); for( k = 0; k < npm1; k++ ) { l = ( k >> 1 ); /* * Who is sending to who and how much */ if( ( ( mydist + k ) & 1 ) != 0 ) { ibufS = ( indx = MModAdd( mydist, l, size ) ) * count; lbufS = ( indx == npm1 ? COUNT : ibufS + count ); lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); ibufR = ( indx = MModSub( mydist, l+1, size ) ) * count; lbufR = ( indx == npm1 ? COUNT : ibufR + count ); lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); partner = prev; } else { ibufS = ( indx = MModSub( mydist, l, size ) ) * count; lbufS = ( indx == npm1 ? COUNT : ibufS + count ); lbufS = Mmin( COUNT, lbufS ) - ibufS; lbufS = Mmax( 0, lbufS ); ibufR = ( indx = MModAdd( mydist, l+1, size ) ) * count; lbufR = ( indx == npm1 ? COUNT : ibufR + count ); lbufR = Mmin( COUNT, lbufR ) - ibufR; lbufR = Mmax( 0, lbufR ); partner = next; } /* * Exchange the messages */ if( lbufS > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibufS, lbufS, I_SEND ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Issend( _M_ROLL_BUFF_S, _M_ROLL_COUNT_S, _M_ROLL_TYPE_S, partner, msgid, comm, &PANEL->request[0] ); } else { if( ierr == MPI_SUCCESS ) ierr = MPI_Issend( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->request[0] ); } if( lbufR > 0 ) { #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = HPL_packL( PANEL, ibufR, lbufR, I_RECV ); #endif if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( _M_ROLL_BUFF_R, _M_ROLL_COUNT_R, _M_ROLL_TYPE_R, partner, msgid, comm, &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &PANEL->dtypes[I_RECV] ); #endif } else { if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(&dummy), 0, MPI_BYTE, partner, msgid, comm, &PANEL->status[0] ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Wait ( &PANEL->request[0], &PANEL->status[0] ); #ifdef HPL_USE_MPI_DATATYPE if( ( lbufS > 0 ) && ( ierr == MPI_SUCCESS ) ) ierr = MPI_Type_free( &PANEL->dtypes[I_SEND] ); #endif } /* * If the message was received and being forwarded, return HPL_SUCCESS. * If an error occured in an MPI call, return HPL_FAILURE. */ *IFLAG = ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ); return( *IFLAG ); } #ifdef HPL_STDC_HEADERS int HPL_bwait_blong ( HPL_T_panel * PANEL ) #else int HPL_bwait_blong( PANEL ) HPL_T_panel * PANEL; #endif { /* .. * .. Executable Statements .. */ if( PANEL == NULL ) { return( HPL_SUCCESS ); } if( PANEL->grid->npcol <= 1 ) { return( HPL_SUCCESS ); } return( HPL_SUCCESS ); } hpcc-1.4.1/hpl/src/comm/HPL_bwait.c0000644000000000000000000001112311256503657013613 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_bwait ( HPL_T_panel * PANEL ) #else int HPL_bwait ( PANEL ) HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_bwait HPL_bwait waits for the row broadcast of the current panel to * terminate. Successful completion is indicated by the returned error * code HPL_SUCCESS. * * Arguments * ========= * * PANEL (input/output) HPL_T_panel * * On entry, PANEL points to the current panel data structure * being broadcast. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int ierr; HPL_T_TOP top; /* .. * .. Executable Statements .. */ if( PANEL->grid->npcol <= 1 ) return( HPL_SUCCESS ); /* * Retrieve the selected virtual broadcast topology */ top = PANEL->algo->btopo; switch( top ) { case HPL_1RING_M : ierr = HPL_bwait_1rinM( PANEL ); break; case HPL_1RING : ierr = HPL_bwait_1ring( PANEL ); break; case HPL_2RING_M : ierr = HPL_bwait_2rinM( PANEL ); break; case HPL_2RING : ierr = HPL_bwait_2ring( PANEL ); break; case HPL_BLONG_M : ierr = HPL_bwait_blonM( PANEL ); break; case HPL_BLONG : ierr = HPL_bwait_blong( PANEL ); break; default : ierr = HPL_SUCCESS; } return( ierr ); /* * End of HPL_bwait */ } hpcc-1.4.1/hpl/src/comm/HPL_copyL.c0000644000000000000000000001076411256503657013605 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_copyL ( HPL_T_panel * PANEL ) #else void HPL_copyL ( PANEL ) HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_copyL copies the panel of columns, the L1 replicated submatrix, * the pivot array and the info scalar into a contiguous workspace for * later broadcast. * * The copy of this panel into a contiguous buffer can be enforced by * specifying -DHPL_COPY_L in the architecture specific Makefile. * * Arguments * ========= * * PANEL (input/output) HPL_T_panel * * On entry, PANEL points to the current panel data structure * being broadcast. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int jb, lda; /* .. * .. Executable Statements .. */ if( PANEL->grid->mycol == PANEL->pcol ) { jb = PANEL->jb; lda = PANEL->lda; if( PANEL->grid->myrow == PANEL->prow ) { HPL_dlacpy( PANEL->mp-jb, jb, Mptr( PANEL->A, jb, -jb, lda ), lda, PANEL->L2, PANEL->ldl2 ); } else { HPL_dlacpy( PANEL->mp, jb, Mptr( PANEL->A, 0, -jb, lda ), lda, PANEL->L2, PANEL->ldl2 ); } } /* * End of HPL_copyL */ } hpcc-1.4.1/hpl/src/comm/HPL_packL.c0000644000000000000000000002132511256503657013544 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_packL ( HPL_T_panel * PANEL, const int INDEX, const int LEN, const int IBUF ) #else int HPL_packL ( PANEL, INDEX, LEN, IBUF ) HPL_T_panel * PANEL; const int INDEX; const int LEN; const int IBUF; #endif { /* * Purpose * ======= * * HPL_packL forms the MPI data type for the panel to be broadcast. * Successful completion is indicated by the returned error code * MPI_SUCCESS. * * Arguments * ========= * * PANEL (input/output) HPL_T_panel * * On entry, PANEL points to the current panel data structure * being broadcast. * * INDEX (input) const int * On entry, INDEX points to the first entry of the packed * buffer being broadcast. * * LEN (input) const int * On entry, LEN is the length of the packed buffer. * * IBUF (input) const int * On entry, IBUF specifies the panel buffer/count/type entries * that should be initialized. * * --------------------------------------------------------------------- */ #ifdef HPL_USE_MPI_DATATYPE /* * .. Local Variables .. */ #ifndef HPL_COPY_L MPI_Datatype * type = NULL; void * * * bufs = NULL; double * A; int * blen = NULL; MPI_Aint * disp = NULL; int curr, i, i1, ibuf, ierr=MPI_SUCCESS, j1, jb, jbm, jbp1, lda, len, m, m1, nbufs; #else int ierr; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_COPY_L /* * Panel + L1 + DPIV have been copied into a contiguous buffer - Create * and commit a contiguous data type */ PANEL->buffers[IBUF] = (void *)(PANEL->L2 + INDEX); PANEL->counts [IBUF] = 1; ierr = MPI_Type_contiguous( LEN, MPI_DOUBLE, &PANEL->dtypes[IBUF] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); return( ierr ); #else /* * Panel is not contiguous (because of LDA and also L1 + DPIV) - Create * and commit a struct data type */ jbp1 = ( jb = PANEL->jb ) + 1; /* * Temporaries to create the type struct. */ bufs = (void * * *)malloc( jbp1 * sizeof( void * * ) ); blen = (int *)malloc( jbp1 * sizeof( int ) ); disp = (MPI_Aint *)malloc( jbp1 * sizeof( MPI_Aint ) ); type = (MPI_Datatype *)malloc( jbp1 * sizeof( MPI_Datatype ) ); if( ( bufs != NULL ) && ( blen != NULL ) && ( disp != NULL ) && ( type != NULL ) ) { m = PANEL->mp; curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) m -= jb; len = LEN; ibuf = INDEX; nbufs = 0; jbm = jb * m; if( ( m > 0 ) && ( ibuf < jbm ) ) { /* * Retrieve proper pointers depending on process row and column */ if( PANEL->grid->mycol == PANEL->pcol ) { lda = PANEL->lda; if( curr != 0 ) { A = Mptr( PANEL->A, jb, -jb, lda ); } else { A = Mptr( PANEL->A, 0, -jb, lda ); } } else { lda = PANEL->ldl2; A = PANEL->L2; } /* * Pack the first (partial) column of L */ m1 = m - ( i1 = ibuf - ( j1 = ibuf / m ) * m ); m1 = Mmin( len, m1 ); bufs[nbufs] = (void **)(Mptr( A, i1, j1, lda )); type[nbufs] = MPI_DOUBLE; blen[nbufs] = m1; if( ierr == MPI_SUCCESS ) ierr = MPI_Address( bufs[nbufs], &disp[nbufs] ); nbufs++; len -= m1; j1++; ibuf += m1; /* * Pack the remaining columns of L */ while( ( len > 0 ) && ( j1 < jb ) ) { m1 = Mmin( len, m ); bufs[nbufs] = (void**)(Mptr( A, 0, j1, lda )); type[nbufs] = MPI_DOUBLE; blen[nbufs] = m1; if( ierr == MPI_SUCCESS ) ierr = MPI_Address( bufs[nbufs], &disp[nbufs] ); nbufs++; len -= m1; j1++; ibuf += m1; } } /* * Pack L1, DPIV, DINFO */ if( len > 0 ) { /* L1, DPIV, DINFO */ bufs[nbufs] = (void **)(PANEL->L1 + ibuf - jbm); type[nbufs] = MPI_DOUBLE; blen[nbufs] = len; if( ierr == MPI_SUCCESS ) ierr = MPI_Address( bufs[nbufs], &disp[nbufs] ); nbufs++; } for( i = 1; i < nbufs; i++ ) disp[i] -= disp[0]; disp[0] = 0; PANEL->buffers[IBUF] = (void ***)(bufs[0]); PANEL->counts [IBUF] = 1; /* * construct the struct type */ if( ierr == MPI_SUCCESS ) ierr = MPI_Type_struct( nbufs, blen, disp, type, &PANEL->dtypes[IBUF] ); /* * release temporaries */ if( bufs ) free( bufs ); if( blen ) free( blen ); if( disp ) free( disp ); if( type ) free( type ); /* * commit the type */ if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &PANEL->dtypes[IBUF] ); return( ierr ); } else { /* * Memory allocation failed -> abort */ HPL_pabort( __LINE__, "HPL_packL", "Memory allocation failed" ); return( MPI_SUCCESS ); /* never executed (hopefully ...) */ } #endif #else /* HPL_USE_MPI_DATATYPE not defined - Oops, there is a bug somewhere, so, just in case and until I find it ... */ return( MPI_SUCCESS ); #endif /* * End of HPL_packL */ } hpcc-1.4.1/hpl/src/comm/HPL_recv.c0000644000000000000000000001364011256503657013452 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Do not use MPI user-defined data types no matter what. This routine * is used for small contiguous messages. */ #ifdef HPL_USE_MPI_DATATYPE #undef HPL_USE_MPI_DATATYPE #endif #ifdef HPL_STDC_HEADERS int HPL_recv ( double * RBUF, int RCOUNT, int SRC, int RTAG, MPI_Comm COMM ) #else int HPL_recv ( RBUF, RCOUNT, SRC, RTAG, COMM ) double * RBUF; int RCOUNT; int SRC; int RTAG; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is * to allow for some experimentation / tuning of this simple routine. * Successful completion is indicated by the returned error code * HPL_SUCCESS. In the case of messages of length less than or equal to * zero, this function returns immediately. * * Arguments * ========= * * RBUF (local output) double * * On entry, RBUF specifies the starting address of buffer to be * received. * * RCOUNT (local input) int * On entry, RCOUNT specifies the number of double precision * entries in RBUF. RCOUNT must be at least zero. * * SRC (local input) int * On entry, SRC specifies the rank of the sending process in * the communication space defined by COMM. * * RTAG (local input) int * On entry, STAG specifies the message tag to be used for this * communication operation. * * COMM (local input) MPI_Comm * The MPI communicator identifying the communication space. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Status status; #ifdef HPL_USE_MPI_DATATYPE MPI_Datatype type; #endif int ierr; /* .. * .. Executable Statements .. */ if( RCOUNT <= 0 ) return( HPL_SUCCESS ); #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(RBUF), 1, type, SRC, RTAG, COMM, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, SRC, RTAG, COMM, &status ); #endif return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); /* * End of HPL_recv */ } hpcc-1.4.1/hpl/src/comm/HPL_sdrv.c0000644000000000000000000002173611256503657013476 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Do not use MPI user-defined data types no matter what. This routine * is used for small contiguous messages. */ #ifdef HPL_USE_MPI_DATATYPE #undef HPL_USE_MPI_DATATYPE #endif #ifdef HPL_STDC_HEADERS int HPL_sdrv ( double * SBUF, int SCOUNT, int STAG, double * RBUF, int RCOUNT, int RTAG, int PARTNER, MPI_Comm COMM ) #else int HPL_sdrv ( SBUF, SCOUNT, STAG, RBUF, RCOUNT, RTAG, PARTNER, COMM ) double * SBUF; int SCOUNT; int STAG; double * RBUF; int RCOUNT; int RTAG; int PARTNER; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is * to allow for some experimentation and tuning of this simple function. * Messages of length less than or equal to zero are not sent nor * received. Successful completion is indicated by the returned error * code HPL_SUCCESS. * * Arguments * ========= * * SBUF (local input) double * * On entry, SBUF specifies the starting address of buffer to be * sent. * * SCOUNT (local input) int * On entry, SCOUNT specifies the number of double precision * entries in SBUF. SCOUNT must be at least zero. * * STAG (local input) int * On entry, STAG specifies the message tag to be used for the * sending communication operation. * * RBUF (local output) double * * On entry, RBUF specifies the starting address of buffer to be * received. * * RCOUNT (local input) int * On entry, RCOUNT specifies the number of double precision * entries in RBUF. RCOUNT must be at least zero. * * RTAG (local input) int * On entry, RTAG specifies the message tag to be used for the * receiving communication operation. * * PARTNER (local input) int * On entry, PARTNER specifies the rank of the collaborative * process in the communication space defined by COMM. * * COMM (local input) MPI_Comm * The MPI communicator identifying the communication space. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef HPL_USE_MPI_DATATYPE MPI_Datatype type[2]; #endif MPI_Request request; MPI_Status status; int ierr; /* .. * .. Executable Statements .. */ if( RCOUNT > 0 ) { if( SCOUNT > 0 ) { #ifdef HPL_USE_MPI_DATATYPE /* * Post asynchronous receive */ ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[0] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Irecv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, COMM, &request ); /* * Blocking send */ if( ierr == MPI_SUCCESS ) ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[1] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, COMM ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[1] ); /* * Wait for the receive to complete */ if( ierr == MPI_SUCCESS ) ierr = MPI_Wait( &request, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[0] ); #else /* * Post asynchronous receive */ ierr = MPI_Irecv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, PARTNER, RTAG, COMM, &request ); /* * Blocking send */ if( ierr == MPI_SUCCESS ) ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, STAG, COMM ); /* * Wait for the receive to complete */ if( ierr == MPI_SUCCESS ) ierr = MPI_Wait( &request, &status ); #endif } else { /* * Blocking receive */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( RCOUNT, MPI_DOUBLE, &type[0] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[0] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( (void *)(RBUF), 1, type[0], PARTNER, RTAG, COMM, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[0] ); #else ierr = MPI_Recv( (void *)(RBUF), RCOUNT, MPI_DOUBLE, PARTNER, RTAG, COMM, &status ); #endif } } else if( SCOUNT > 0 ) { /* * Blocking send */ #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type[1] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[1] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( (void *)(SBUF), 1, type[1], PARTNER, STAG, COMM ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[1] ) ); #else ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, PARTNER, STAG, COMM ); #endif } else { ierr = MPI_SUCCESS; } return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); /* * End of HPL_sdrv */ } hpcc-1.4.1/hpl/src/comm/HPL_send.c0000644000000000000000000001347511256503657013452 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Do not use MPI user-defined data types no matter what. This routine * is used for small contiguous messages. */ #ifdef HPL_USE_MPI_DATATYPE #undef HPL_USE_MPI_DATATYPE #endif #ifdef HPL_STDC_HEADERS int HPL_send ( double * SBUF, int SCOUNT, int DEST, int STAG, MPI_Comm COMM ) #else int HPL_send ( SBUF, SCOUNT, DEST, STAG, COMM ) double * SBUF; int SCOUNT; int DEST; int STAG; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_send is a simple wrapper around MPI_Send. Its main purpose is * to allow for some experimentation / tuning of this simple routine. * Successful completion is indicated by the returned error code * MPI_SUCCESS. In the case of messages of length less than or equal to * zero, this function returns immediately. * * Arguments * ========= * * SBUF (local input) double * * On entry, SBUF specifies the starting address of buffer to be * sent. * * SCOUNT (local input) int * On entry, SCOUNT specifies the number of double precision * entries in SBUF. SCOUNT must be at least zero. * * DEST (local input) int * On entry, DEST specifies the rank of the receiving process in * the communication space defined by COMM. * * STAG (local input) int * On entry, STAG specifies the message tag to be used for this * communication operation. * * COMM (local input) MPI_Comm * The MPI communicator identifying the communication space. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef HPL_USE_MPI_DATATYPE MPI_Datatype type; #endif int ierr; /* .. * .. Executable Statements .. */ if( SCOUNT <= 0 ) return( HPL_SUCCESS ); #ifdef HPL_USE_MPI_DATATYPE ierr = MPI_Type_contiguous( SCOUNT, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( (void *)(SBUF), 1, type, DEST, STAG, COMM ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else ierr = MPI_Send( (void *)(SBUF), SCOUNT, MPI_DOUBLE, DEST, STAG, COMM ); #endif return( ( ierr == MPI_SUCCESS ? HPL_SUCCESS : HPL_FAILURE ) ); /* * End of HPL_send */ } hpcc-1.4.1/hpl/src/grid/HPL_all_reduce.c0000644000000000000000000001206511256503657014604 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_all_reduce ( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const HPL_T_OP OP, MPI_Comm COMM ) #else int HPL_all_reduce ( BUFFER, COUNT, DTYPE, OP, COMM ) void * BUFFER; const int COUNT; const HPL_T_TYPE DTYPE; const HPL_T_OP OP; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_all_reduce performs a global reduce operation across all * processes of a group leaving the results on all processes. * * Arguments * ========= * * BUFFER (local input/global output) void * * On entry, BUFFER points to the buffer to be combined. On * exit, this array contains the combined data and is identical * on all processes in the group. * * COUNT (global input) const int * On entry, COUNT indicates the number of entries in BUFFER. * COUNT must be at least zero. * * DTYPE (global input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * OP (global input) const HPL_T_OP * On entry, OP is a pointer to the local combine function. * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int hplerr; /* .. * .. Executable Statements .. */ hplerr = HPL_reduce( BUFFER, COUNT, DTYPE, OP, 0, COMM ); if( hplerr != MPI_SUCCESS ) return( hplerr ); return( HPL_broadcast( BUFFER, COUNT, DTYPE, 0, COMM ) ); /* * End of HPL_all_reduce */ } hpcc-1.4.1/hpl/src/grid/HPL_barrier.c0000644000000000000000000000771611256503657014142 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_barrier ( MPI_Comm COMM ) #else int HPL_barrier ( COMM ) MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_barrier blocks the caller until all process members have call it. * The call returns at any process only after all group members have * entered the call. * * Arguments * ========= * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i=0; /* .. * .. Executable Statements .. */ return( HPL_broadcast( (void*)(&i), 1, HPL_INT, 0, COMM ) ); /* * End of HPL_barrier */ } hpcc-1.4.1/hpl/src/grid/HPL_broadcast.c0000644000000000000000000001400111256503657014437 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_broadcast ( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const int ROOT, MPI_Comm COMM ) #else int HPL_broadcast ( BUFFER, COUNT, DTYPE, ROOT, COMM ) void * BUFFER; const int COUNT; const HPL_T_TYPE DTYPE; const int ROOT; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_broadcast broadcasts a message from the process with rank ROOT to * all processes in the group. * * Arguments * ========= * * BUFFER (local input/output) void * * On entry, BUFFER points to the buffer to be broadcast. On * exit, this array contains the broadcast data and is identical * on all processes in the group. * * COUNT (global input) const int * On entry, COUNT indicates the number of entries in BUFFER. * COUNT must be at least zero. * * DTYPE (global input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * ROOT (global input) const int * On entry, ROOT is the coordinate of the source process. * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int hplerr=MPI_SUCCESS, ip2=1, kk, mask=1, mpierr, mydist, partner, rank, size, tag = MSGID_BEGIN_COLL; MPI_Status status; /* .. * .. Executable Statements .. */ if( COUNT <= 0 ) return( MPI_SUCCESS ); mpierr = MPI_Comm_size( COMM, &size ); if( size <= 1 ) return( mpierr ); mpierr = MPI_Comm_rank( COMM, &rank ); kk = size - 1; while( kk > 1 ) { kk >>= 1; ip2 <<= 1; mask <<= 1; mask++; } mydist = MModSub( rank, ROOT, size ); do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { partner = MModAdd( ROOT, partner, size ); mpierr = MPI_Recv( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), partner, tag, COMM, &status ); } else if( partner < size ) { partner = MModAdd( ROOT, partner, size ); mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), partner, tag, COMM ); } if( mpierr != MPI_SUCCESS ) hplerr = mpierr; } ip2 >>= 1; } while( ip2 ); return( hplerr ); /* * End of HPL_broadcast */ } hpcc-1.4.1/hpl/src/grid/HPL_grid_exit.c0000644000000000000000000001125711256503657014465 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_grid_exit ( HPL_T_grid * GRID ) #else int HPL_grid_exit ( GRID ) HPL_T_grid * GRID; #endif { /* * Purpose * ======= * * HPL_grid_exit marks the process grid object for deallocation. The * returned error code MPI_SUCCESS indicates successful completion. * Other error codes are (MPI) implementation dependent. * * Arguments * ========= * * GRID (local input/output) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid to be released. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int hplerr = MPI_SUCCESS, mpierr; /* .. * .. Executable Statements .. */ if( GRID->all_comm != MPI_COMM_NULL ) { mpierr = MPI_Comm_free( &(GRID->row_comm) ); if( mpierr != MPI_SUCCESS ) hplerr = mpierr; mpierr = MPI_Comm_free( &(GRID->col_comm) ); if( mpierr != MPI_SUCCESS ) hplerr = mpierr; mpierr = MPI_Comm_free( &(GRID->all_comm) ); if( mpierr != MPI_SUCCESS ) hplerr = mpierr; } GRID->order = HPL_COLUMN_MAJOR; GRID->iam = GRID->myrow = GRID->mycol = -1; GRID->nprow = GRID->npcol = GRID->nprocs = -1; GRID->row_ip2 = GRID->row_hdim = GRID->row_ip2m1 = GRID->row_mask = -1; GRID->col_ip2 = GRID->col_hdim = GRID->col_ip2m1 = GRID->col_mask = -1; return( hplerr ); /* * End of HPL_grid_exit */ } hpcc-1.4.1/hpl/src/grid/HPL_grid_info.c0000644000000000000000000001224511256503657014445 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_grid_info ( const HPL_T_grid * GRID, int * NPROW, int * NPCOL, int * MYROW, int * MYCOL ) #else int HPL_grid_info ( GRID, NPROW, NPCOL, MYROW, MYCOL ) const HPL_T_grid * GRID; int * NPROW; int * NPCOL; int * MYROW; int * MYCOL; #endif { /* * Purpose * ======= * * HPL_grid_info returns the grid shape and the coordinates in the grid * of the calling process. Successful completion is indicated by the * returned error code MPI_SUCCESS. Other error codes depend on the MPI * implementation. * * Arguments * ========= * * GRID (local input) const HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * NPROW (global output) int * * On exit, NPROW specifies the number of process rows in the * grid. NPROW is at least one. * * NPCOL (global output) int * * On exit, NPCOL specifies the number of process columns in * the grid. NPCOL is at least one. * * MYROW (global output) int * * On exit, MYROW specifies my row process coordinate in the * grid. MYROW is greater than or equal to zero and less than * NPROW. * * MYCOL (global output) int * * On exit, MYCOL specifies my column process coordinate in the * grid. MYCOL is greater than or equal to zero and less than * NPCOL. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ *NPROW = GRID->nprow; *NPCOL = GRID->npcol; *MYROW = GRID->myrow; *MYCOL = GRID->mycol; return( MPI_SUCCESS ); /* * End of HPL_grid_info */ } hpcc-1.4.1/hpl/src/grid/HPL_grid_init.c0000644000000000000000000001730611256503657014460 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_grid_init ( MPI_Comm COMM, const HPL_T_ORDER ORDER, const int NPROW, const int NPCOL, HPL_T_grid * GRID ) #else int HPL_grid_init ( COMM, ORDER, NPROW, NPCOL, GRID ) MPI_Comm COMM; const HPL_T_ORDER ORDER; const int NPROW; const int NPCOL; HPL_T_grid * GRID; #endif { /* * Purpose * ======= * * HPL_grid_init creates a NPROW x NPCOL process grid using column- or * row-major ordering from an initial collection of processes identified * by an MPI communicator. Successful completion is indicated by the * returned error code MPI_SUCCESS. Other error codes depend on the MPI * implementation. The coordinates of processes that are not part of the * grid are set to values outside of [0..NPROW) x [0..NPCOL). * * Arguments * ========= * * COMM (global/local input) MPI_Comm * On entry, COMM is the MPI communicator identifying the * initial collection of processes out of which the grid is * formed. * * ORDER (global input) const HPL_T_ORDER * On entry, ORDER specifies how the processes should be ordered * in the grid as follows: * ORDER = HPL_ROW_MAJOR row-major ordering; * ORDER = HPL_COLUMN_MAJOR column-major ordering; * * NPROW (global input) const int * On entry, NPROW specifies the number of process rows in the * grid to be created. NPROW must be at least one. * * NPCOL (global input) const int * On entry, NPCOL specifies the number of process columns in * the grid to be created. NPCOL must be at least one. * * GRID (local input/output) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information to be initialized. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int hdim, hplerr=MPI_SUCCESS, ierr, ip2, k, mask, mycol, myrow, nprocs, rank, size; /* .. * .. Executable Statements .. */ MPI_Comm_rank( COMM, &rank ); MPI_Comm_size( COMM, &size ); /* * Abort if illegal process grid */ nprocs = NPROW * NPCOL; if( ( nprocs > size ) || ( NPROW < 1 ) || ( NPCOL < 1 ) ) { HPL_pabort( __LINE__, "HPL_grid_init", "Illegal Grid" ); } /* * Row- or column-major ordering of the processes */ if( ORDER == HPL_ROW_MAJOR ) { GRID->order = HPL_ROW_MAJOR; myrow = rank / NPCOL; mycol = rank - myrow * NPCOL; } else { GRID->order = HPL_COLUMN_MAJOR; mycol = rank / NPROW; myrow = rank - mycol * NPROW; } GRID->iam = rank; GRID->myrow = myrow; GRID->mycol = mycol; GRID->nprow = NPROW; GRID->npcol = NPCOL; GRID->nprocs = nprocs; /* * row_ip2 : largest power of two <= nprow; * row_hdim : row_ip2 procs hypercube dim; * row_ip2m1 : largest power of two <= nprow-1; * row_mask : row_ip2m1 procs hypercube mask; */ hdim = 0; ip2 = 1; k = NPROW; while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } GRID->row_ip2 = ip2; GRID->row_hdim = hdim; mask = ip2 = 1; k = NPROW - 1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } GRID->row_ip2m1 = ip2; GRID->row_mask = mask; /* * col_ip2 : largest power of two <= npcol; * col_hdim : col_ip2 procs hypercube dim; * col_ip2m1 : largest power of two <= npcol-1; * col_mask : col_ip2m1 procs hypercube mask; */ hdim = 0; ip2 = 1; k = NPCOL; while( k > 1 ) { k >>= 1; ip2 <<= 1; hdim++; } GRID->col_ip2 = ip2; GRID->col_hdim = hdim; mask = ip2 = 1; k = NPCOL - 1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } GRID->col_ip2m1 = ip2; GRID->col_mask = mask; /* * All communicator, leave if I am not part of this grid. Creation of the * row- and column communicators. */ ierr = MPI_Comm_split( COMM, ( rank < nprocs ? 0 : MPI_UNDEFINED ), rank, &(GRID->all_comm) ); if( GRID->all_comm == MPI_COMM_NULL ) return( ierr ); ierr = MPI_Comm_split( GRID->all_comm, myrow, mycol, &(GRID->row_comm) ); if( ierr != MPI_SUCCESS ) hplerr = ierr; ierr = MPI_Comm_split( GRID->all_comm, mycol, myrow, &(GRID->col_comm) ); if( ierr != MPI_SUCCESS ) hplerr = ierr; return( hplerr ); /* * End of HPL_grid_init */ } hpcc-1.4.1/hpl/src/grid/HPL_max.c0000644000000000000000000001161611256503657013273 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_max ( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE ) #else void HPL_max ( N, IN, INOUT, DTYPE ) const int N; const void * IN; void * INOUT; const HPL_T_TYPE DTYPE; #endif { /* * Purpose * ======= * * HPL_max combines (max) two buffers. * * * Arguments * ========= * * N (input) const int * On entry, N specifies the length of the buffers to be * combined. N must be at least zero. * * IN (input) const void * * On entry, IN points to the input-only buffer to be combined. * * INOUT (input/output) void * * On entry, INOUT points to the input-output buffer to be * combined. On exit, the entries of this array contains the * combined results. * * DTYPE (input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register int i; /* .. * .. Executable Statements .. */ if( DTYPE == HPL_INT ) { const int * a = (const int *)(IN); int * b = (int *)(INOUT); for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); } else { const double * a = (const double *)(IN); double * b = (double *)(INOUT); for( i = 0; i < N; i++ ) b[i] = Mmax( a[i], b[i] ); } /* * End of HPL_max */ } hpcc-1.4.1/hpl/src/grid/HPL_min.c0000644000000000000000000001161611256503657013271 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_min ( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE ) #else void HPL_min ( N, IN, INOUT, DTYPE ) const int N; const void * IN; void * INOUT; const HPL_T_TYPE DTYPE; #endif { /* * Purpose * ======= * * HPL_min combines (min) two buffers. * * * Arguments * ========= * * N (input) const int * On entry, N specifies the length of the buffers to be * combined. N must be at least zero. * * IN (input) const void * * On entry, IN points to the input-only buffer to be combined. * * INOUT (input/output) void * * On entry, INOUT points to the input-output buffer to be * combined. On exit, the entries of this array contains the * combined results. * * DTYPE (input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register int i; /* .. * .. Executable Statements .. */ if( DTYPE == HPL_INT ) { const int * a = (const int *)(IN); int * b = (int *)(INOUT); for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); } else { const double * a = (const double *)(IN); double * b = (double *)(INOUT); for( i = 0; i < N; i++ ) b[i] = Mmin( a[i], b[i] ); } /* * End of HPL_min */ } hpcc-1.4.1/hpl/src/grid/HPL_pnum.c0000644000000000000000000001114111256503657013456 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_pnum ( const HPL_T_grid * GRID, const int MYROW, const int MYCOL ) #else int HPL_pnum ( GRID, MYROW, MYCOL ) const HPL_T_grid * GRID; const int MYROW; const int MYCOL; #endif { /* * Purpose * ======= * * HPL_pnum determines the rank of a process as a function of its * coordinates in the grid. * * Arguments * ========= * * GRID (local input) const HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * MYROW (local input) const int * On entry, MYROW specifies the row coordinate of the process * whose rank is to be determined. MYROW must be greater than or * equal to zero and less than NPROW. * * MYCOL (local input) const int * On entry, MYCOL specifies the column coordinate of the * process whose rank is to be determined. MYCOL must be greater * than or equal to zero and less than NPCOL. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ if( GRID->order == HPL_ROW_MAJOR ) return( MYROW * GRID->npcol + MYCOL ); else return( MYCOL * GRID->nprow + MYROW ); /* * End of HPL_pnum */ } hpcc-1.4.1/hpl/src/grid/HPL_reduce.c0000644000000000000000000001647511256503657013765 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_reduce ( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const HPL_T_OP OP, const int ROOT, MPI_Comm COMM ) #else int HPL_reduce ( BUFFER, COUNT, DTYPE, OP, ROOT, COMM ) void * BUFFER; const int COUNT; const HPL_T_TYPE DTYPE; const HPL_T_OP OP; const int ROOT; MPI_Comm COMM; #endif { /* * Purpose * ======= * * HPL_reduce performs a global reduce operation across all processes of * a group. Note that the input buffer is used as workarray and in all * processes but the accumulating process corrupting the original data. * * Arguments * ========= * * BUFFER (local input/output) void * * On entry, BUFFER points to the buffer to be reduced. On * exit, and in process of rank ROOT this array contains the * reduced data. This buffer is also used as workspace during * the operation in the other processes of the group. * * COUNT (global input) const int * On entry, COUNT indicates the number of entries in BUFFER. * COUNT must be at least zero. * * DTYPE (global input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * OP (global input) const HPL_T_OP * On entry, OP is a pointer to the local combine function. * * ROOT (global input) const int * On entry, ROOT is the coordinate of the accumulating process. * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Status status; void * buffer = NULL; int hplerr=MPI_SUCCESS, d=1, i, ip2=1, mask=0, mpierr, mydist, partner, rank, size, tag = MSGID_BEGIN_COLL; /* .. * .. Executable Statements .. */ if( COUNT <= 0 ) return( MPI_SUCCESS ); mpierr = MPI_Comm_size( COMM, &size ); if( size == 1 ) return( MPI_SUCCESS ); mpierr = MPI_Comm_rank( COMM, &rank ); i = size - 1; while( i > 1 ) { i >>= 1; d++; } if( DTYPE == HPL_INT ) buffer = (void *)( (int *) malloc( (size_t)(COUNT) * sizeof( int ) ) ); else buffer = (void *)( (double *)malloc( (size_t)(COUNT) * sizeof( double ) ) ); if( !( buffer ) ) { HPL_pabort( __LINE__, "HPL_reduce", "Memory allocation failed" ); } if( ( mydist = MModSub( rank, ROOT, size ) ) == 0 ) { do { mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), MModAdd( ROOT, ip2, size ), tag, COMM, &status ); if( mpierr != MPI_SUCCESS ) hplerr = mpierr; OP( COUNT, buffer, BUFFER, DTYPE ); ip2 <<= 1; d--; } while( d ); } else { do { if( ( mydist & mask ) == 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { partner = MModAdd( ROOT, partner, size ); mpierr = MPI_Send( BUFFER, COUNT, HPL_2_MPI_TYPE( DTYPE ), partner, tag, COMM ); } else if( partner < size ) { partner = MModAdd( ROOT, partner, size ); mpierr = MPI_Recv( buffer, COUNT, HPL_2_MPI_TYPE( DTYPE ), partner, tag, COMM, &status ); OP( COUNT, buffer, BUFFER, DTYPE ); } if( mpierr != MPI_SUCCESS ) hplerr = mpierr; } mask ^= ip2; ip2 <<= 1; d--; } while( d ); } if( buffer ) free( buffer ); return( hplerr ); /* * End of HPL_reduce */ } hpcc-1.4.1/hpl/src/grid/HPL_sum.c0000644000000000000000000001156411256503657013314 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_sum ( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE ) #else void HPL_sum ( N, IN, INOUT, DTYPE ) const int N; const void * IN; void * INOUT; const HPL_T_TYPE DTYPE; #endif { /* * Purpose * ======= * * HPL_sum combines (sum) two buffers. * * * Arguments * ========= * * N (input) const int * On entry, N specifies the length of the buffers to be * combined. N must be at least zero. * * IN (input) const void * * On entry, IN points to the input-only buffer to be combined. * * INOUT (input/output) void * * On entry, INOUT points to the input-output buffer to be * combined. On exit, the entries of this array contains the * combined results. * * DTYPE (input) const HPL_T_TYPE * On entry, DTYPE specifies the type of the buffers operands. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register int i; /* .. * .. Executable Statements .. */ if( DTYPE == HPL_INT ) { const int * a = (const int *)(IN); int * b = (int *)(INOUT); for( i = 0; i < N; i++ ) b[i] += a[i]; } else { const double * a = (const double *)(IN); double * b = (double *)(INOUT); for( i = 0; i < N; i++ ) b[i] += a[i]; } /* * End of HPL_sum */ } hpcc-1.4.1/hpl/src/panel/HPL_pdpanel_disp.c0000644000000000000000000001014711256503657015320 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_pdpanel_disp ( HPL_T_panel * * PANEL ) #else int HPL_pdpanel_disp ( PANEL ) HPL_T_panel * * PANEL; #endif { /* * Purpose * ======= * * HPL_pdpanel_disp deallocates the panel structure and resources and * stores the error code returned by the panel factorization. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * * On entry, PANEL points to the address of the panel data * structure to be deallocated. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int mpierr; /* .. * .. Executable Statements .. */ /* * Deallocate the panel resources and panel structure */ mpierr = HPL_pdpanel_free( *PANEL ); if( *PANEL ) free( *PANEL ); *PANEL = NULL; return( mpierr ); /* * End of HPL_pdpanel_disp */ } hpcc-1.4.1/hpl/src/panel/HPL_pdpanel_free.c0000644000000000000000000001073211256503657015302 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_pdpanel_free ( HPL_T_panel * PANEL ) #else int HPL_pdpanel_free ( PANEL ) HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_pdpanel_free deallocates the panel resources and stores the error * code returned by the panel factorization. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the panel data structure from * which the resources should be deallocated. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ if( PANEL->pmat->info == 0 ) PANEL->pmat->info = *(PANEL->DINFO); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( PANEL->L1block, VSIP_TRUE ); (void) vsip_blockrelease_d( PANEL->L2block, VSIP_TRUE ); if( PANEL->grid->nprow > 1 ) (void) vsip_blockrelease_d( PANEL->Ublock, VSIP_TRUE ); /* * Destroy blocks */ vsip_blockdestroy_d( PANEL->L1block ); vsip_blockdestroy_d( PANEL->L2block ); if( PANEL->grid->nprow > 1 ) vsip_blockdestroy_d( PANEL->Ublock ); #endif if( PANEL->WORK ) free( PANEL->WORK ); if( PANEL->IWORK ) free( PANEL->IWORK ); return( MPI_SUCCESS ); /* * End of HPL_pdpanel_free */ } hpcc-1.4.1/hpl/src/panel/HPL_pdpanel_init.c0000644000000000000000000003504311256503657015326 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_NO_MPI_DATATYPE /* The user insists to not use MPI types */ #ifndef HPL_COPY_L /* and also want to avoid the copy of L ... */ #define HPL_COPY_L /* well, sorry, can not do that: force the copy */ #endif #endif #ifdef HPL_STDC_HEADERS void HPL_pdpanel_init ( HPL_T_grid * GRID, HPL_T_palg * ALGO, const int M, const int N, const int JB, HPL_T_pmat * A, const int IA, const int JA, const int TAG, HPL_T_panel * PANEL ) #else void HPL_pdpanel_init ( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) HPL_T_grid * GRID; HPL_T_palg * ALGO; const int M; const int N; const int JB; HPL_T_pmat * A; const int IA; const int JA; const int TAG; HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_pdpanel_init initializes a panel data structure. * * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * M (local input) const int * On entry, M specifies the global number of rows of the panel. * M must be at least zero. * * N (local input) const int * On entry, N specifies the global number of columns of the * panel and trailing submatrix. N must be at least zero. * * JB (global input) const int * On entry, JB specifies is the number of columns of the panel. * JB must be at least zero. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * IA (global input) const int * On entry, IA is the global row index identifying the panel * and trailing submatrix. IA must be at least zero. * * JA (global input) const int * On entry, JA is the global column index identifying the panel * and trailing submatrix. JA must be at least zero. * * TAG (global input) const int * On entry, TAG is the row broadcast message id. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ size_t dalign; int icurcol, icurrow, ii, itmp1, jj, lwork, ml2, mp, mycol, myrow, nb, npcol, nprow, nq, nu; /* .. * .. Executable Statements .. */ PANEL->grid = GRID; /* ptr to the process grid */ PANEL->algo = ALGO; /* ptr to the algo parameters */ PANEL->pmat = A; /* ptr to the local array info */ myrow = GRID->myrow; mycol = GRID->mycol; nprow = GRID->nprow; npcol = GRID->npcol; nb = A->nb; HPL_infog2l( IA, JA, nb, nb, nb, nb, 0, 0, myrow, mycol, nprow, npcol, &ii, &jj, &icurrow, &icurcol ); mp = HPL_numrocI( M, IA, nb, nb, myrow, 0, nprow ); nq = HPL_numrocI( N, JA, nb, nb, mycol, 0, npcol ); /* ptr to trailing part of A */ PANEL->A = Mptr( (double *)(A->A), ii, jj, A->ld ); /* * Workspace pointers are initialized to NULL. */ PANEL->WORK = NULL; PANEL->L2 = NULL; PANEL->L1 = NULL; PANEL->DPIV = NULL; PANEL->DINFO = NULL; PANEL->U = NULL; PANEL->IWORK = NULL; /* * Local lengths, indexes process coordinates */ PANEL->nb = nb; /* distribution blocking factor */ PANEL->jb = JB; /* panel width */ PANEL->m = M; /* global # of rows of trailing part of A */ PANEL->n = N; /* global # of cols of trailing part of A */ PANEL->ia = IA; /* global row index of trailing part of A */ PANEL->ja = JA; /* global col index of trailing part of A */ PANEL->mp = mp; /* local # of rows of trailing part of A */ PANEL->nq = nq; /* local # of cols of trailing part of A */ PANEL->ii = ii; /* local row index of trailing part of A */ PANEL->jj = jj; /* local col index of trailing part of A */ PANEL->lda = A->ld; /* local leading dim of array A */ PANEL->prow = icurrow; /* proc row owning 1st row of trailing A */ PANEL->pcol = icurcol; /* proc col owning 1st col of trailing A */ PANEL->msgid = TAG; /* message id to be used for panel bcast */ /* * Initialize ldl2 and len to temporary dummy values and Update tag for * next panel */ PANEL->ldl2 = 0; /* local leading dim of array L2 */ PANEL->len = 0; /* length of the buffer to broadcast */ /* * Figure out the exact amount of workspace needed by the factorization * and the update - Allocate that space - Finish the panel data structu- * re initialization. * * L1: JB x JB in all processes * DPIV: JB in all processes * DINFO: 1 in all processes * * We make sure that those three arrays are contiguous in memory for the * later panel broadcast. We also choose to put this amount of space * right after L2 (when it exist) so that one can receive a contiguous * buffer. */ dalign = ALGO->align * sizeof( double ); if( npcol == 1 ) /* P x 1 process grid */ { /* space for L1, DPIV, DINFO */ lwork = ALGO->align + ( PANEL->len = JB * JB + JB + 1 ); if( nprow > 1 ) /* space for U */ { nu = nq - JB; lwork += JB * Mmax( 0, nu ); } if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * sizeof( double ) ) ) ) { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } /* * Initialize the pointers of the panel structure - Always re-use A in * the only process column */ PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); PANEL->ldl2 = A->ld; PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); PANEL->DPIV = PANEL->L1 + JB * JB; PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1: NULL ); } else { /* space for L2, L1, DPIV */ ml2 = ( myrow == icurrow ? mp - JB : mp ); ml2 = Mmax( 0, ml2 ); PANEL->len = ml2*JB + ( itmp1 = JB*JB + JB + 1 ); #ifdef HPL_COPY_L lwork = ALGO->align + PANEL->len; #else lwork = ALGO->align + ( mycol == icurcol ? itmp1 : PANEL->len ); #endif if( nprow > 1 ) /* space for U */ { nu = ( mycol == icurcol ? nq - JB : nq ); lwork += JB * Mmax( 0, nu ); } if( !( PANEL->WORK = (void *)malloc( (size_t)(lwork) * sizeof( double ) ) ) ) { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } /* * Initialize the pointers of the panel structure - Re-use A in the cur- * rent process column when HPL_COPY_L is not defined. */ #ifdef HPL_COPY_L PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); PANEL->ldl2 = Mmax( 1, ml2 ); PANEL->L1 = PANEL->L2 + ml2 * JB; #else if( mycol == icurcol ) { PANEL->L2 = PANEL->A + ( myrow == icurrow ? JB : 0 ); PANEL->ldl2 = A->ld; PANEL->L1 = (double *)HPL_PTR( PANEL->WORK, dalign ); } else { PANEL->L2 = (double *)HPL_PTR( PANEL->WORK, dalign ); PANEL->ldl2 = Mmax( 1, ml2 ); PANEL->L1 = PANEL->L2 + ml2 * JB; } #endif PANEL->DPIV = PANEL->L1 + JB * JB; PANEL->DINFO = PANEL->DPIV + JB; *(PANEL->DINFO) = 0.0; PANEL->U = ( nprow > 1 ? PANEL->DINFO + 1 : NULL ); } #ifdef HPL_CALL_VSIPL PANEL->Ablock = A->block; /* * Create blocks and bind them to the data pointers */ PANEL->L1block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L1), (vsip_length)(JB*JB), VSIP_MEM_NONE ); PANEL->L2block = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->L2), (vsip_length)(PANEL->ldl2*JB), VSIP_MEM_NONE ); if( nprow > 1 ) { nu = ( mycol == icurcol ? nq - JB : nq ); PANEL->Ublock = vsip_blockbind_d( (vsip_scalar_d *)(PANEL->U), (vsip_length)(JB * Mmax( 0, nu )), VSIP_MEM_NONE ); } else { PANEL->Ublock = A->block; } #endif /* * If nprow is 1, we just allocate an array of JB integers for the swap. * When nprow > 1, we allocate the space for the index arrays immediate- * ly. The exact size of this array depends on the swapping routine that * will be used, so we allocate the maximum: * * IWORK[0] is of size at most 1 + * IPL is of size at most 1 + * IPID is of size at most 4 * JB + * * For HPL_pdlaswp00: * lindxA is of size at most 2 * JB + * lindxAU is of size at most 2 * JB + * llen is of size at most NPROW + * llen_sv is of size at most NPROW. * * For HPL_pdlaswp01: * ipA is of size ar most 1 + * lindxA is of size at most 2 * JB + * lindxAU is of size at most 2 * JB + * iplen is of size at most NPROW + 1 + * ipmap is of size at most NPROW + * ipmapm1 is of size at most NPROW + * permU is of size at most JB + * iwork is of size at most MAX( 2*JB, NPROW+1 ). * * that is 3 + 8*JB + MAX(2*NPROW, 3*NPROW+1+JB+MAX(2*JB,NPROW+1)) * = 4 + 9*JB + 3*NPROW + MAX( 2*JB, NPROW+1 ). * * We use the fist entry of this to work array to indicate whether the * the local index arrays have already been computed, and if yes, by * which function: * IWORK[0] = -1: no index arrays have been computed so far; * IWORK[0] = 0: HPL_pdlaswp00 already computed those arrays; * IWORK[0] = 1: HPL_pdlaswp01 already computed those arrays; * This allows to save some redundant and useless computations. */ if( nprow == 1 ) { lwork = JB; } else { itmp1 = (JB << 1); lwork = nprow + 1; itmp1 = Mmax( itmp1, lwork ); lwork = 4 + (9 * JB) + (3 * nprow) + itmp1; } PANEL->IWORK = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); if( PANEL->IWORK == NULL ) { HPL_pabort( __LINE__, "HPL_pdpanel_init", "Memory allocation failed" ); } /* Initialize the first entry of the workarray */ *(PANEL->IWORK) = -1; /* * End of HPL_pdpanel_init */ } hpcc-1.4.1/hpl/src/panel/HPL_pdpanel_new.c0000644000000000000000000001460011256503657015150 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpanel_new ( HPL_T_grid * GRID, HPL_T_palg * ALGO, const int M, const int N, const int JB, HPL_T_pmat * A, const int IA, const int JA, const int TAG, HPL_T_panel * * PANEL ) #else void HPL_pdpanel_new ( GRID, ALGO, M, N, JB, A, IA, JA, TAG, PANEL ) HPL_T_grid * GRID; HPL_T_palg * ALGO; const int M; const int N; const int JB; HPL_T_pmat * A; const int IA; const int JA; const int TAG; HPL_T_panel * * PANEL; #endif { /* * Purpose * ======= * * HPL_pdpanel_new creates and initializes a panel data structure. * * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * M (local input) const int * On entry, M specifies the global number of rows of the panel. * M must be at least zero. * * N (local input) const int * On entry, N specifies the global number of columns of the * panel and trailing submatrix. N must be at least zero. * * JB (global input) const int * On entry, JB specifies is the number of columns of the panel. * JB must be at least zero. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * IA (global input) const int * On entry, IA is the global row index identifying the panel * and trailing submatrix. IA must be at least zero. * * JA (global input) const int * On entry, JA is the global column index identifying the panel * and trailing submatrix. JA must be at least zero. * * TAG (global input) const int * On entry, TAG is the row broadcast message id. * * PANEL (local input/output) HPL_T_panel * * * On entry, PANEL points to the address of the panel data * structure to create and initialize. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ HPL_T_panel * p = NULL; /* .. * .. Executable Statements .. */ /* * Allocate the panel structure - Check for enough memory */ if( !( p = (HPL_T_panel *)malloc( sizeof( HPL_T_panel ) ) ) ) { HPL_pabort( __LINE__, "HPL_pdpanel_new", "Memory allocation failed" ); } HPL_pdpanel_init( GRID, ALGO, M, N, JB, A, IA, JA, TAG, p ); *PANEL = p; /* * End of HPL_pdpanel_new */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp00N.c0000644000000000000000000002042211256503657014626 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP00N_DEPTH #define HPL_LASWP00N_DEPTH 32 #define HPL_LASWP00N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp00N ( const int M, const int N, double * A, const int LDA, const int * IPIV ) #else void HPL_dlaswp00N ( M, N, A, LDA, IPIV ) const int M; const int N; double * A; const int LDA; const int * IPIV; #endif { /* * Purpose * ======= * * HPL_dlaswp00N performs a series of local row interchanges on a matrix * A. One row interchange is initiated for rows 0 through M-1 of A. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of the array A to be * interchanged. M must be at least zero. * * N (local input) const int * On entry, N specifies the number of columns of the array A. * N must be at least zero. * * A (local input/output) double * * On entry, A points to an array of dimension (LDA,N) to which * the row interchanges will be applied. On exit, the permuted * matrix. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * IPIV (local input) const int * * On entry, IPIV is an array of size M that contains the * pivoting information. For k in [0..M), IPIV[k]=IROFF + l * implies that local rows k and l are to be interchanged. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ register double r; double * a0, * a1; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP00N_LOG2_DEPTH ); int ip, nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP00N_LOG2_DEPTH ) << HPL_LASWP00N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP00N_DEPTH, A += incA ) { for( i = 0; i < M; i++ ) { if( i != ( ip = IPIV[i] ) ) { a0 = A + i; a1 = A + ip; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #if ( HPL_LASWP00N_DEPTH > 1 ) r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #endif #if ( HPL_LASWP00N_DEPTH > 2 ) r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #endif #if ( HPL_LASWP00N_DEPTH > 4 ) r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #endif #if ( HPL_LASWP00N_DEPTH > 8 ) r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #endif #if ( HPL_LASWP00N_DEPTH > 16 ) r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; r = *a0; *a0 = *a1; *a1 = r; a0 += LDA; a1 += LDA; #endif } } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { if( i != ( ip = IPIV[i] ) ) { a0 = A + i; a1 = A + ip; for( j = 0; j < nr; j++, a0 += LDA, a1 += LDA ) { r = *a0; *a0 = *a1; *a1 = r; } } } } /* * End of HPL_dlaswp00N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp01N.c0000644000000000000000000002237711256503657014642 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP01N_DEPTH #define HPL_LASWP01N_DEPTH 32 #define HPL_LASWP01N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp01N ( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp01N ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) const int M; const int N; double * A; const int LDA; double * U; const int LDU; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp01N copies scattered rows of A into itself and into an * array U. The row offsets in A of the source rows are specified by * LINDXA. The destination of those rows are specified by LINDXAU. A * positive value of LINDXAU indicates that the array destination is U, * and A otherwise. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A that should be * moved within A or copied into U. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of rows of A that should be * moved within A or copied into U. N must be at least zero. * * A (local input/output) double * * On entry, A points to an array of dimension (LDA,N). The rows * of this array specified by LINDXA should be moved within A or * copied into U. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,N). The rows * of A specified by LINDXA are be copied within this array U at * the positions indicated by positive values of LINDXAU. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,M). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be moved within A or * or copied into U. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M that contains * the local row indexes of U where the rows of A should be * copied at. This array also contains the local row offsets in * A where some of the rows of A should be moved to. A positive * value of LINDXAU[i] indicates that the row LINDXA[i] of A * should be copied into U at the position LINDXAU[i]; otherwise * the row LINDXA[i] of A should be moved at the position * -LINDXAU[i] within A. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * a0, * a1; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP01N_LOG2_DEPTH ), incU = (int)( (unsigned int)(LDU) << HPL_LASWP01N_LOG2_DEPTH ); int lda1, nu, nr; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01N_LOG2_DEPTH ) << HPL_LASWP01N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP01N_DEPTH, A += incA, U += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } *a1 = *a0; a1 += lda1; a0 += LDA; #if ( HPL_LASWP01N_DEPTH > 1 ) *a1 = *a0; a1 += lda1; a0 += LDA; #endif #if ( HPL_LASWP01N_DEPTH > 2 ) *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; #endif #if ( HPL_LASWP01N_DEPTH > 4 ) *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; #endif #if ( HPL_LASWP01N_DEPTH > 8 ) *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; #endif #if ( HPL_LASWP01N_DEPTH > 16 ) *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; *a1 = *a0; a1 += lda1; a0 += LDA; #endif } } if( nr ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]); lda1 = LDU; } else { a1 = A - (size_t)(LINDXAU[i]); lda1 = LDA; } for( j = 0; j < nr; j++, a1 += lda1, a0 += LDA ) { *a1 = *a0; } } } /* * End of HPL_dlaswp01N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp01T.c0000644000000000000000000002515011256503657014640 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP01T_DEPTH #define HPL_LASWP01T_DEPTH 32 #define HPL_LASWP01T_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp01T ( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp01T ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) const int M; const int N; double * A; const int LDA; double * U; const int LDU; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp01T copies scattered rows of A into itself and into an * array U. The row offsets in A of the source rows are specified by * LINDXA. The destination of those rows are specified by LINDXAU. A * positive value of LINDXAU indicates that the array destination is U, * and A otherwise. Rows of A are stored as columns in U. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A that should be * moved within A or copied into U. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of rows of A that should be * moved within A or copied into U. N must be at least zero. * * A (local input/output) double * * On entry, A points to an array of dimension (LDA,N). The rows * of this array specified by LINDXA should be moved within A or * copied into U. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,M). The rows * of A specified by LINDXA are copied within this array U at * the positions indicated by positive values of LINDXAU. The * rows of A are stored as columns in U. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,N). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be moved within A or * or copied into U. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M that contains * the local row indexes of U where the rows of A should be * copied at. This array also contains the local row offsets in * A where some of the rows of A should be moved to. A positive * value of LINDXAU[i] indicates that the row LINDXA[i] of A * should be copied into U at the position LINDXAU[i]; otherwise * the row LINDXA[i] of A should be moved at the position * -LINDXAU[i] within A. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * a0, * a1; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP01T_LOG2_DEPTH ), incU = ( 1 << HPL_LASWP01T_LOG2_DEPTH ); int nu, nr; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP01T_LOG2_DEPTH ) << HPL_LASWP01T_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP01T_DEPTH, A += incA, U += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); a1[ 0] = *a0; a0 += LDA; #if ( HPL_LASWP01T_DEPTH > 1 ) a1[ 1] = *a0; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 2 ) a1[ 2] = *a0; a0 += LDA; a1[ 3] = *a0; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 4 ) a1[ 4] = *a0; a0 += LDA; a1[ 5] = *a0; a0 += LDA; a1[ 6] = *a0; a0 += LDA; a1[ 7] = *a0; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 8 ) a1[ 8] = *a0; a0 += LDA; a1[ 9] = *a0; a0 += LDA; a1[10] = *a0; a0 += LDA; a1[11] = *a0; a0 += LDA; a1[12] = *a0; a0 += LDA; a1[13] = *a0; a0 += LDA; a1[14] = *a0; a0 += LDA; a1[15] = *a0; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 16 ) a1[16] = *a0; a0 += LDA; a1[17] = *a0; a0 += LDA; a1[18] = *a0; a0 += LDA; a1[19] = *a0; a0 += LDA; a1[20] = *a0; a0 += LDA; a1[21] = *a0; a0 += LDA; a1[22] = *a0; a0 += LDA; a1[23] = *a0; a0 += LDA; a1[24] = *a0; a0 += LDA; a1[25] = *a0; a0 += LDA; a1[26] = *a0; a0 += LDA; a1[27] = *a0; a0 += LDA; a1[28] = *a0; a0 += LDA; a1[29] = *a0; a0 += LDA; a1[30] = *a0; a0 += LDA; a1[31] = *a0; a0 += LDA; #endif } else { a1 = A - (size_t)(LINDXAU[i]); *a1 = *a0; a1 += LDA; a0 += LDA; #if ( HPL_LASWP01T_DEPTH > 1 ) *a1 = *a0; a1 += LDA; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 2 ) *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 4 ) *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 8 ) *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; #endif #if ( HPL_LASWP01T_DEPTH > 16 ) *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; *a1 = *a0; a1 += LDA; a0 += LDA; #endif } } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); if( LINDXAU[i] >= 0 ) { a1 = U + (size_t)(LINDXAU[i]) * (size_t)(LDU); for( j = 0; j < nr; j++, a0 += LDA ) { a1[j] = *a0; } } else { a1 = A - (size_t)(LINDXAU[i]); for( j = 0; j < nr; j++, a1 += LDA, a0 += LDA ) { *a1 = *a0; } } } } /* * End of HPL_dlaswp01T */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp02N.c0000644000000000000000000002056711256503657014642 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP02N_DEPTH #define HPL_LASWP02N_DEPTH 32 #define HPL_LASWP02N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp02N ( const int M, const int N, const double * A, const int LDA, double * W0, double * W, const int LDW, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp02N ( M, N, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) const int M; const int N; const double * A; const int LDA; double * W0; double * W; const int LDW; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp02N packs scattered rows of an array A into workspace W. * The row offsets in A are specified by LINDXA. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A that should be * copied into W. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of rows of A that should be * copied into W. N must be at least zero. * * A (local input) const double * * On entry, A points to an array of dimension (LDA,N). The rows * of this array specified by LINDXA should be copied into W. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * W0 (local input/output) double * * On exit, W0 is an array of size (M-1)*LDW+1, that contains * the destination offset in U where the columns of W should be * copied. * * W (local output) double * * On entry, W is an array of size (LDW,M). On exit, W contains * the rows LINDXA[i] for i in [0..M) of A stored contiguously * in W(:,i). * * LDW (local input) const int * On entry, LDW specifies the leading dimension of the array W. * LDW must be at least MAX(1,N+1). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be copied into W. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M that contains * the local row indexes of U that should be copied into A and * replaced by the rows of W. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * A0 = A, * a0; double * w0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP02N_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; for( i = 0; i < M; i++ ) *(W0+(size_t)(i)*(size_t)(LDW)) = (double)(LINDXAU[i]); nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP02N_LOG2_DEPTH ) << HPL_LASWP02N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP02N_DEPTH, A0 += incA, W += HPL_LASWP02N_DEPTH ) { for( i = 0; i < M; i++ ) { a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); w0[ 0] = *a0; a0 += LDA; #if ( HPL_LASWP02N_DEPTH > 1 ) w0[ 1] = *a0; a0 += LDA; #endif #if ( HPL_LASWP02N_DEPTH > 2 ) w0[ 2] = *a0; a0 += LDA; w0[ 3] = *a0; a0 += LDA; #endif #if ( HPL_LASWP02N_DEPTH > 4 ) w0[ 4] = *a0; a0 += LDA; w0[ 5] = *a0; a0 += LDA; w0[ 6] = *a0; a0 += LDA; w0[ 7] = *a0; a0 += LDA; #endif #if ( HPL_LASWP02N_DEPTH > 8 ) w0[ 8] = *a0; a0 += LDA; w0[ 9] = *a0; a0 += LDA; w0[10] = *a0; a0 += LDA; w0[11] = *a0; a0 += LDA; w0[12] = *a0; a0 += LDA; w0[13] = *a0; a0 += LDA; w0[14] = *a0; a0 += LDA; w0[15] = *a0; a0 += LDA; #endif #if ( HPL_LASWP02N_DEPTH > 16 ) w0[16] = *a0; a0 += LDA; w0[17] = *a0; a0 += LDA; w0[18] = *a0; a0 += LDA; w0[19] = *a0; a0 += LDA; w0[20] = *a0; a0 += LDA; w0[21] = *a0; a0 += LDA; w0[22] = *a0; a0 += LDA; w0[23] = *a0; a0 += LDA; w0[24] = *a0; a0 += LDA; w0[25] = *a0; a0 += LDA; w0[26] = *a0; a0 += LDA; w0[27] = *a0; a0 += LDA; w0[28] = *a0; a0 += LDA; w0[29] = *a0; a0 += LDA; w0[30] = *a0; a0 += LDA; w0[31] = *a0; a0 += LDA; #endif } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { a0 = A0 + (size_t)(LINDXA[i]); w0 = W + (size_t)(i) * (size_t)(LDW); for( j = 0; j < nr; j++, a0 += LDA ) { w0[j] = *a0; } } } /* * End of HPL_dlaswp02N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp03N.c0000644000000000000000000001763411256503657014644 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP03N_DEPTH #define HPL_LASWP03N_DEPTH 32 #define HPL_LASWP03N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp03N ( const int M, const int N, double * U, const int LDU, const double * W0, const double * W, const int LDW ) #else void HPL_dlaswp03N ( M, N, U, LDU, W0, W, LDW ) const int M; const int N; double * U; const int LDU; const double * W0; const double * W; const int LDW; #endif { /* * Purpose * ======= * * HPL_dlaswp03N copies columns of W into rows of an array U. The * destination in U of these columns contained in W is stored within W0. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of columns of W stored * contiguously that should be copied into U. M must be at least * zero. * * N (local input) const int * On entry, N specifies the length of columns of W stored * contiguously that should be copied into U. N must be at least * zero. * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,N). Columns * of W are copied as rows within this array U at the positions * specified in W0. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,M). * * W0 (local input) const double * * On entry, W0 is an array of size (M-1)*LDW+1, that contains * the destination offset in U where the columns of W should be * copied. * * W (local input) const double * * On entry, W is an array of size (LDW,M), that contains data * to be copied into U. For i in [0..M), entries W(:,i) should * be copied into the row or column W0(i*LDW) of U. * * LDW (local input) const int * On entry, LDW specifies the leading dimension of the array W. * LDW must be at least MAX(1,N+1). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * w = W, * w0; double * u0; const int incU = (int)( (unsigned int)(LDU) << HPL_LASWP03N_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03N_LOG2_DEPTH ) << HPL_LASWP03N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP03N_DEPTH, U += incU, w += HPL_LASWP03N_DEPTH ) { for( i = 0; i < M; i++ ) { u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); w0 = w + (size_t)(i) * (size_t)(LDW); *u0 = w0[ 0]; u0 += LDU; #if ( HPL_LASWP03N_DEPTH > 1 ) *u0 = w0[ 1]; u0 += LDU; #endif #if ( HPL_LASWP03N_DEPTH > 2 ) *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; #endif #if ( HPL_LASWP03N_DEPTH > 4 ) *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; #endif #if ( HPL_LASWP03N_DEPTH > 8 ) *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; #endif #if ( HPL_LASWP03N_DEPTH > 16 ) *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; #endif } } if( nr ) { for( i = 0; i < M; i++ ) { u0 = U + (size_t)(*( W0 + (size_t)(i) * (size_t)(LDW) )); w0 = w + (size_t)(i) * (size_t)(LDW); for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } } } /* * End of HPL_dlaswp03N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp03T.c0000644000000000000000000001702211256503657014641 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP03T_DEPTH #define HPL_LASWP03T_DEPTH 32 #define HPL_LASWP03T_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp03T ( const int M, const int N, double * U, const int LDU, const double * W0, const double * W, const int LDW ) #else void HPL_dlaswp03T ( M, N, U, LDU, W0, W, LDW ) const int M; const int N; double * U; const int LDU; const double * W0; const double * W; const int LDW; #endif { /* * Purpose * ======= * * HPL_dlaswp03T copies columns of W into an array U. The destination * in U of these columns contained in W is stored within W0. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of columns of W stored * contiguously that should be copied into U. M must be at least * zero. * * N (local input) const int * On entry, N specifies the length of columns of W stored * contiguously that should be copied into U. N must be at least * zero. * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,M). Columns * of W are copied within the array U at the positions specified * in W0. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,N). * * W0 (local input) const double * * On entry, W0 is an array of size (M-1)*LDW+1, that contains * the destination offset in U where the columns of W should be * copied. * * W (local input) const double * * On entry, W is an array of size (LDW,M), that contains data * to be copied into U. For i in [0..M), entries W(:,i) should * be copied into the row or column W0(i*LDW) of U. * * LDW (local input) const int * On entry, LDW specifies the leading dimension of the array W. * LDW must be at least MAX(1,N+1). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * w = W, * w0; double * u0; const int incU = ( 1 << HPL_LASWP03T_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP03T_LOG2_DEPTH ) << HPL_LASWP03T_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP03T_DEPTH, U += incU, w += HPL_LASWP03T_DEPTH ) { for( i = 0; i < M; i++ ) { u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); w0 = w + (size_t)(i) * (size_t)(LDW); u0[ 0] = w0[ 0]; #if ( HPL_LASWP03T_DEPTH > 1 ) u0[ 1] = w0[ 1]; #endif #if ( HPL_LASWP03T_DEPTH > 2 ) u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; #endif #if ( HPL_LASWP03T_DEPTH > 4 ) u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; #endif #if ( HPL_LASWP03T_DEPTH > 8 ) u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; #endif #if ( HPL_LASWP03T_DEPTH > 16 ) u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; #endif } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))) * (size_t)(LDU); w0 = w + (size_t)(i) * (size_t)(LDW); for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } } } /* * End of HPL_dlaswp03T */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp04N.c0000644000000000000000000002753511256503657014646 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP04N_DEPTH #define HPL_LASWP04N_DEPTH 32 #define HPL_LASWP04N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp04N ( const int M0, const int M1, const int N, double * U, const int LDU, double * A, const int LDA, const double * W0, const double * W, const int LDW, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp04N ( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) const int M0; const int M1; const int N; double * U; const int LDU; double * A; const int LDA; const double * W0; const double * W; const int LDW; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U * with columns of W. In addition M1 - M0 columns of W are copied into * rows of U. * * Arguments * ========= * * M0 (local input) const int * On entry, M0 specifies the number of rows of U that should be * copied into A and replaced by columns of W. M0 must be at * least zero. * * M1 (local input) const int * On entry, M1 specifies the number of columns of W that should * be copied into rows of U. M1 must be at least zero. * * N (local input) const int * On entry, N specifies the length of the rows of U that should * be copied into A. N must be at least zero. * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,N). This * array contains the rows that are to be copied into A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,M1). * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * rows of U indicated by LINDXAU. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M0). * * W0 (local input) const double * * On entry, W0 is an array of size (M-1)*LDW+1, that contains * the destination offset in U where the columns of W should be * copied. * * W (local input) const double * * On entry, W is an array of size (LDW,M0+M1), that contains * data to be copied into U. For i in [M0..M0+M1), the entries * W(:,i) are copied into the row W0(i*LDW) of U. * * LDW (local input) const int * On entry, LDW specifies the leading dimension of the array W. * LDW must be at least MAX(1,N+1). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M0 containing the * local row indexes A into which rows of U are copied. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M0 that contains * the local row indexes of U that should be copied into A and * replaced by the columns of W. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * w = W, * w0; double * a0, * u0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP04N_LOG2_DEPTH ), incU = (int)( (unsigned int)(LDU) << HPL_LASWP04N_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04N_LOG2_DEPTH ) << HPL_LASWP04N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP04N_DEPTH, A += incA, U += incU, w += HPL_LASWP04N_DEPTH ) { for( i = 0; i < M0; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U + (size_t)(LINDXAU[i]); w0 = w + (size_t)(i) * (size_t)(LDW); *a0 = *u0; *u0 = w0[ 0]; a0 += LDA; u0 += LDU; #if ( HPL_LASWP04N_DEPTH > 1 ) *a0 = *u0; *u0 = w0[ 1]; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 2 ) *a0 = *u0; *u0 = w0[ 2]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[ 3]; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 4 ) *a0 = *u0; *u0 = w0[ 4]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[ 5]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[ 6]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[ 7]; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 8 ) *a0 = *u0; *u0 = w0[ 8]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[ 9]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[10]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[11]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[12]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[13]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[14]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[15]; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 16 ) *a0 = *u0; *u0 = w0[16]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[17]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[18]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[19]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[20]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[21]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[22]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[23]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[24]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[25]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[26]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[27]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[28]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[29]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[30]; a0 += LDA; u0 += LDU; *a0 = *u0; *u0 = w0[31]; a0 += LDA; u0 += LDU; #endif } for( i = M0; i < M1; i++ ) { u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); w0 = w + (size_t)(i) * (size_t)(LDW); *u0 = w0[ 0]; u0 += LDU; #if ( HPL_LASWP04N_DEPTH > 1 ) *u0 = w0[ 1]; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 2 ) *u0 = w0[ 2]; u0 += LDU; *u0 = w0[ 3]; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 4 ) *u0 = w0[ 4]; u0 += LDU; *u0 = w0[ 5]; u0 += LDU; *u0 = w0[ 6]; u0 += LDU; *u0 = w0[ 7]; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 8 ) *u0 = w0[ 8]; u0 += LDU; *u0 = w0[ 9]; u0 += LDU; *u0 = w0[10]; u0 += LDU; *u0 = w0[11]; u0 += LDU; *u0 = w0[12]; u0 += LDU; *u0 = w0[13]; u0 += LDU; *u0 = w0[14]; u0 += LDU; *u0 = w0[15]; u0 += LDU; #endif #if ( HPL_LASWP04N_DEPTH > 16 ) *u0 = w0[16]; u0 += LDU; *u0 = w0[17]; u0 += LDU; *u0 = w0[18]; u0 += LDU; *u0 = w0[19]; u0 += LDU; *u0 = w0[20]; u0 += LDU; *u0 = w0[21]; u0 += LDU; *u0 = w0[22]; u0 += LDU; *u0 = w0[23]; u0 += LDU; *u0 = w0[24]; u0 += LDU; *u0 = w0[25]; u0 += LDU; *u0 = w0[26]; u0 += LDU; *u0 = w0[27]; u0 += LDU; *u0 = w0[28]; u0 += LDU; *u0 = w0[29]; u0 += LDU; *u0 = w0[30]; u0 += LDU; *u0 = w0[31]; u0 += LDU; #endif } } if( nr ) { for( i = 0; i < M0; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U + (size_t)(LINDXAU[i]); w0 = w + (size_t)(i) * (size_t)(LDW); for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; *u0 = w0[j]; } } for( i = M0; i < M1; i++ ) { u0 = U + (size_t)(*(W0+(size_t)(i)*(size_t)(LDW))); w0 = w + (size_t)(i) * (size_t)(LDW); for( j = 0; j < nr; j++, u0 += LDU ) { *u0 = w0[j]; } } } /* * End of HPL_dlaswp04N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp04T.c0000644000000000000000000002621511256503657014646 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP04T_DEPTH #define HPL_LASWP04T_DEPTH 32 #define HPL_LASWP04T_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp04T ( const int M0, const int M1, const int N, double * U, const int LDU, double * A, const int LDA, const double * W0, const double * W, const int LDW, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp04T ( M0, M1, N, U, LDU, A, LDA, W0, W, LDW, LINDXA, LINDXAU ) const int M0; const int M1; const int N; double * U; const int LDU; double * A; const int LDA; const double * W0; const double * W; const int LDW; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those * columns of U with columns of W. In addition M1 - M0 columns of W are * copied into U. * * Arguments * ========= * * M0 (local input) const int * On entry, M0 specifies the number of columns of U that should * be copied into A and replaced by columns of W. M0 must be at * least zero. * * M1 (local input) const int * On entry, M1 specifies the number of columnns of W that will * be copied into U. M1 must be at least zero. * * N (local input) const int * On entry, N specifies the length of the columns of U that * will be copied into rows of A. N must be at least zero. * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,*). This * array contains the columns that are to be copied into rows of * A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,N). * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * columns of U indicated by LINDXAU. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M0). * * W0 (local input) const double * * On entry, W0 is an array of size (M-1)*LDW+1, that contains * the destination offset in U where the columns of W should be * copied. * * W (local input) const double * * On entry, W is an array of size (LDW,M0+M1), that contains * data to be copied into U. For i in [M0..M0+M1), the entries * W(:,i) are copied into the column W0(i*LDW) of U. * * LDW (local input) const int * On entry, LDW specifies the leading dimension of the array W. * LDW must be at least MAX(1,N+1). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M0 containing the * local row indexes A into which columns of U are copied. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M0 that contains * the local column indexes of U that should be copied into A * and replaced by the columns of W. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * w = W, * w0; double * a0, * u0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP04T_LOG2_DEPTH ), incU = ( 1 << HPL_LASWP04T_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( ( M0 <= 0 ) && ( M1 <= 0 ) ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP04T_LOG2_DEPTH ) << HPL_LASWP04T_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP04T_DEPTH, A += incA, U += incU, w += HPL_LASWP04T_DEPTH ) { for( i = 0; i < M0; i++ ) { a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; *a0 = u0[ 0]; u0[ 0] = w0[ 0]; a0 += LDA; #if ( HPL_LASWP04T_DEPTH > 1 ) *a0 = u0[ 1]; u0[ 1] = w0[ 1]; a0 += LDA; #endif #if ( HPL_LASWP04T_DEPTH > 2 ) *a0 = u0[ 2]; u0[ 2] = w0[ 2]; a0 += LDA; *a0 = u0[ 3]; u0[ 3] = w0[ 3]; a0 += LDA; #endif #if ( HPL_LASWP04T_DEPTH > 4 ) *a0 = u0[ 4]; u0[ 4] = w0[ 4]; a0 += LDA; *a0 = u0[ 5]; u0[ 5] = w0[ 5]; a0 += LDA; *a0 = u0[ 6]; u0[ 6] = w0[ 6]; a0 += LDA; *a0 = u0[ 7]; u0[ 7] = w0[ 7]; a0 += LDA; #endif #if ( HPL_LASWP04T_DEPTH > 8 ) *a0 = u0[ 8]; u0[ 8] = w0[ 8]; a0 += LDA; *a0 = u0[ 9]; u0[ 9] = w0[ 9]; a0 += LDA; *a0 = u0[10]; u0[10] = w0[10]; a0 += LDA; *a0 = u0[11]; u0[11] = w0[11]; a0 += LDA; *a0 = u0[12]; u0[12] = w0[12]; a0 += LDA; *a0 = u0[13]; u0[13] = w0[13]; a0 += LDA; *a0 = u0[14]; u0[14] = w0[14]; a0 += LDA; *a0 = u0[15]; u0[15] = w0[15]; a0 += LDA; #endif #if ( HPL_LASWP04T_DEPTH > 16 ) *a0 = u0[16]; u0[16] = w0[16]; a0 += LDA; *a0 = u0[17]; u0[17] = w0[17]; a0 += LDA; *a0 = u0[18]; u0[18] = w0[18]; a0 += LDA; *a0 = u0[19]; u0[19] = w0[19]; a0 += LDA; *a0 = u0[20]; u0[20] = w0[20]; a0 += LDA; *a0 = u0[21]; u0[21] = w0[21]; a0 += LDA; *a0 = u0[22]; u0[22] = w0[22]; a0 += LDA; *a0 = u0[23]; u0[23] = w0[23]; a0 += LDA; *a0 = u0[24]; u0[24] = w0[24]; a0 += LDA; *a0 = u0[25]; u0[25] = w0[25]; a0 += LDA; *a0 = u0[26]; u0[26] = w0[26]; a0 += LDA; *a0 = u0[27]; u0[27] = w0[27]; a0 += LDA; *a0 = u0[28]; u0[28] = w0[28]; a0 += LDA; *a0 = u0[29]; u0[29] = w0[29]; a0 += LDA; *a0 = u0[30]; u0[30] = w0[30]; a0 += LDA; *a0 = u0[31]; u0[31] = w0[31]; a0 += LDA; #endif } for( i = M0; i < M1; i++ ) { u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; u0[ 0] = w0[ 0]; #if ( HPL_LASWP04T_DEPTH > 1 ) u0[ 1] = w0[ 1]; #endif #if ( HPL_LASWP04T_DEPTH > 2 ) u0[ 2] = w0[ 2]; u0[ 3] = w0[ 3]; #endif #if ( HPL_LASWP04T_DEPTH > 4 ) u0[ 4] = w0[ 4]; u0[ 5] = w0[ 5]; u0[ 6] = w0[ 6]; u0[ 7] = w0[ 7]; #endif #if ( HPL_LASWP04T_DEPTH > 8 ) u0[ 8] = w0[ 8]; u0[ 9] = w0[ 9]; u0[10] = w0[10]; u0[11] = w0[11]; u0[12] = w0[12]; u0[13] = w0[13]; u0[14] = w0[14]; u0[15] = w0[15]; #endif #if ( HPL_LASWP04T_DEPTH > 16 ) u0[16] = w0[16]; u0[17] = w0[17]; u0[18] = w0[18]; u0[19] = w0[19]; u0[20] = w0[20]; u0[21] = w0[21]; u0[22] = w0[22]; u0[23] = w0[23]; u0[24] = w0[24]; u0[25] = w0[25]; u0[26] = w0[26]; u0[27] = w0[27]; u0[28] = w0[28]; u0[29] = w0[29]; u0[30] = w0[30]; u0[31] = w0[31]; #endif } } if( nr > 0 ) { for( i = 0; i < M0; i++ ) { a0 = A + LINDXA[i]; u0 = U + LINDXAU[i] * LDU; w0 = w + i * LDW; for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; u0[j] = w0[j]; } } for( i = M0; i < M1; i++ ) { u0 = U + (int)(*(W0+i*LDW)) * LDU; w0 = w + i * LDW; for( j = 0; j < nr; j++ ) { u0[j] = w0[j]; } } } /* * End of HPL_dlaswp04T */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp05N.c0000644000000000000000000002046311256503657014640 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP05N_DEPTH #define HPL_LASWP05N_DEPTH 32 #define HPL_LASWP05N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp05N ( const int M, const int N, double * A, const int LDA, const double * U, const int LDU, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp05N ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) const int M; const int N; double * A; const int LDA; const double * U; const int LDU; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of * A at positions indicated by LINDXA. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of U that should be * copied into A. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of the rows of U that should * be copied into A. N must be at least zero. * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * rows of U indicated by LINDXAU. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) const double * * On entry, U points to an array of dimension (LDU,N). This * array contains the rows that are to be copied into A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,M). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be copied from U. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M that contains * the local row indexes of U that should be copied in A. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * U0 = U, * u0; double * a0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP05N_LOG2_DEPTH ), incU = (int)( (unsigned int)(LDU) << HPL_LASWP05N_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05N_LOG2_DEPTH ) << HPL_LASWP05N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP05N_DEPTH, A += incA, U0 += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); *a0 = *u0; a0 += LDA; u0 += LDU; #if ( HPL_LASWP05N_DEPTH > 1 ) *a0 = *u0; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP05N_DEPTH > 2 ) *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP05N_DEPTH > 4 ) *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP05N_DEPTH > 8 ) *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP05N_DEPTH > 16 ) *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; *a0 = *u0; a0 += LDA; u0 += LDU; #endif } } if( nr ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(LINDXAU[i]); for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { *a0 = *u0; } } } /* * End of HPL_dlaswp05N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp05T.c0000644000000000000000000002010111256503657014633 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP05T_DEPTH #define HPL_LASWP05T_DEPTH 32 #define HPL_LASWP05T_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp05T ( const int M, const int N, double * A, const int LDA, const double * U, const int LDU, const int * LINDXA, const int * LINDXAU ) #else void HPL_dlaswp05T ( M, N, A, LDA, U, LDU, LINDXA, LINDXAU ) const int M; const int N; double * A; const int LDA; const double * U; const int LDU; const int * LINDXA; const int * LINDXAU; #endif { /* * Purpose * ======= * * HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows * of A at positions indicated by LINDXA. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of columns of U that shouldbe copied into A. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of the columns of U that will * be copied into rows of A. N must be at least zero. * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * columns of U indicated by LINDXAU. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) const double * * On entry, U points to an array of dimension (LDU,*). This * array contains the columns that are to be copied into rows of * A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,N). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be copied from U. * * LINDXAU (local input) const int * * On entry, LINDXAU is an array of dimension M that contains * the local column indexes of U that should be copied in A. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ const double * U0 = U, * u0; double * a0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP05T_LOG2_DEPTH ), incU = ( 1 << HPL_LASWP05T_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP05T_LOG2_DEPTH ) << HPL_LASWP05T_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP05T_DEPTH, A += incA, U0 += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[ i]); u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); *a0 = u0[ 0]; a0 += LDA; #if ( HPL_LASWP05T_DEPTH > 1 ) *a0 = u0[ 1]; a0 += LDA; #endif #if ( HPL_LASWP05T_DEPTH > 2 ) *a0 = u0[ 2]; a0 += LDA; *a0 = u0[ 3]; a0 += LDA; #endif #if ( HPL_LASWP05T_DEPTH > 4 ) *a0 = u0[ 4]; a0 += LDA; *a0 = u0[ 5]; a0 += LDA; *a0 = u0[ 6]; a0 += LDA; *a0 = u0[ 7]; a0 += LDA; #endif #if ( HPL_LASWP05T_DEPTH > 8 ) *a0 = u0[ 8]; a0 += LDA; *a0 = u0[ 9]; a0 += LDA; *a0 = u0[10]; a0 += LDA; *a0 = u0[11]; a0 += LDA; *a0 = u0[12]; a0 += LDA; *a0 = u0[13]; a0 += LDA; *a0 = u0[14]; a0 += LDA; *a0 = u0[15]; a0 += LDA; #endif #if ( HPL_LASWP05T_DEPTH > 16 ) *a0 = u0[16]; a0 += LDA; *a0 = u0[17]; a0 += LDA; *a0 = u0[18]; a0 += LDA; *a0 = u0[19]; a0 += LDA; *a0 = u0[20]; a0 += LDA; *a0 = u0[21]; a0 += LDA; *a0 = u0[22]; a0 += LDA; *a0 = u0[23]; a0 += LDA; *a0 = u0[24]; a0 += LDA; *a0 = u0[25]; a0 += LDA; *a0 = u0[26]; a0 += LDA; *a0 = u0[27]; a0 += LDA; *a0 = u0[28]; a0 += LDA; *a0 = u0[29]; a0 += LDA; *a0 = u0[30]; a0 += LDA; *a0 = u0[31]; a0 += LDA; #endif } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[ i]); u0 = U0 + (size_t)(LINDXAU[i]) * (size_t)(LDU); for( j = 0; j < nr; j++, a0 += LDA ) { *a0 = u0[j]; } } } /* * End of HPL_dlaswp05T */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp06N.c0000644000000000000000000002134311256503657014637 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP06N_DEPTH #define HPL_LASWP06N_DEPTH 32 #define HPL_LASWP06N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp06N ( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA ) #else void HPL_dlaswp06N ( M, N, A, LDA, U, LDU, LINDXA ) const int M; const int N; double * A; const int LDA; double * U; const int LDU; const int * LINDXA; #endif { /* * Purpose * ======= * * HPL_dlaswp06N swaps rows of U with rows of A at positions * indicated by LINDXA. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A that should be * swapped with rows of U. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of the rows of A that should * be swapped with rows of U. N must be at least zero. * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * rows or columns of U. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,N). This * array contains the rows of U that are to be swapped with rows * of A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,M). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be swapped with U. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double r; double * U0 = U, * a0, * u0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP06N_LOG2_DEPTH ), incU = (int)( (unsigned int)(LDU) << HPL_LASWP06N_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06N_LOG2_DEPTH ) << HPL_LASWP06N_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP06N_DEPTH, A += incA, U0 += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #if ( HPL_LASWP06N_DEPTH > 1 ) r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP06N_DEPTH > 2 ) r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP06N_DEPTH > 4 ) r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP06N_DEPTH > 8 ) r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #endif #if ( HPL_LASWP06N_DEPTH > 16 ) r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; r = *a0; *a0 = *u0; *u0 = r; a0 += LDA; u0 += LDU; #endif } } if( nr ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i); for( j = 0; j < nr; j++, a0 += LDA, u0 += LDU ) { r = *a0; *a0 = *u0; *u0 = r; } } } /* * End of HPL_dlaswp06N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp06T.c0000644000000000000000000002107611256503657014650 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP06T_DEPTH #define HPL_LASWP06T_DEPTH 32 #define HPL_LASWP06T_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp06T ( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA ) #else void HPL_dlaswp06T ( M, N, A, LDA, U, LDU, LINDXA ) const int M; const int N; double * A; const int LDA; double * U; const int LDU; const int * LINDXA; #endif { /* * Purpose * ======= * * HPL_dlaswp06T swaps columns of U with rows of A at positions * indicated by LINDXA. * * Arguments * ========= * * M (local input) const int * On entry, M specifies the number of rows of A that should be * swapped with columns of U. M must be at least zero. * * N (local input) const int * On entry, N specifies the length of the rows of A that should * be swapped with columns of U. N must be at least zero. * * A (local output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * the rows of this array specified by LINDXA are replaced by * columns of U. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * U (local input/output) double * * On entry, U points to an array of dimension (LDU,*). This * array contains the columns of U that are to be swapped with * rows of A. * * LDU (local input) const int * On entry, LDU specifies the leading dimension of the array U. * LDU must be at least MAX(1,N). * * LINDXA (local input) const int * * On entry, LINDXA is an array of dimension M that contains the * local row indexes of A that should be swapped with U. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double r; double * U0 = U, * a0, * u0; const int incA = (int)( (unsigned int)(LDA) << HPL_LASWP06T_LOG2_DEPTH ), incU = ( 1 << HPL_LASWP06T_LOG2_DEPTH ); int nr, nu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; nr = N - ( nu = (int)( ( (unsigned int)(N) >> HPL_LASWP06T_LOG2_DEPTH ) << HPL_LASWP06T_LOG2_DEPTH ) ); for( j = 0; j < nu; j += HPL_LASWP06T_DEPTH, A += incA, U0 += incU ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i) * (size_t)(LDU); r = *a0; *a0 = u0[ 0]; u0[ 0] = r; a0 += LDA; #if ( HPL_LASWP06T_DEPTH > 1 ) r = *a0; *a0 = u0[ 1]; u0[ 1] = r; a0 += LDA; #endif #if ( HPL_LASWP06T_DEPTH > 2 ) r = *a0; *a0 = u0[ 2]; u0[ 2] = r; a0 += LDA; r = *a0; *a0 = u0[ 3]; u0[ 3] = r; a0 += LDA; #endif #if ( HPL_LASWP06T_DEPTH > 4 ) r = *a0; *a0 = u0[ 4]; u0[ 4] = r; a0 += LDA; r = *a0; *a0 = u0[ 5]; u0[ 5] = r; a0 += LDA; r = *a0; *a0 = u0[ 6]; u0[ 6] = r; a0 += LDA; r = *a0; *a0 = u0[ 7]; u0[ 7] = r; a0 += LDA; #endif #if ( HPL_LASWP06T_DEPTH > 8 ) r = *a0; *a0 = u0[ 8]; u0[ 8] = r; a0 += LDA; r = *a0; *a0 = u0[ 9]; u0[ 9] = r; a0 += LDA; r = *a0; *a0 = u0[10]; u0[10] = r; a0 += LDA; r = *a0; *a0 = u0[11]; u0[11] = r; a0 += LDA; r = *a0; *a0 = u0[12]; u0[12] = r; a0 += LDA; r = *a0; *a0 = u0[13]; u0[13] = r; a0 += LDA; r = *a0; *a0 = u0[14]; u0[14] = r; a0 += LDA; r = *a0; *a0 = u0[15]; u0[15] = r; a0 += LDA; #endif #if ( HPL_LASWP06T_DEPTH > 16 ) r = *a0; *a0 = u0[16]; u0[16] = r; a0 += LDA; r = *a0; *a0 = u0[17]; u0[17] = r; a0 += LDA; r = *a0; *a0 = u0[18]; u0[18] = r; a0 += LDA; r = *a0; *a0 = u0[19]; u0[19] = r; a0 += LDA; r = *a0; *a0 = u0[20]; u0[20] = r; a0 += LDA; r = *a0; *a0 = u0[21]; u0[21] = r; a0 += LDA; r = *a0; *a0 = u0[22]; u0[22] = r; a0 += LDA; r = *a0; *a0 = u0[23]; u0[23] = r; a0 += LDA; r = *a0; *a0 = u0[24]; u0[24] = r; a0 += LDA; r = *a0; *a0 = u0[25]; u0[25] = r; a0 += LDA; r = *a0; *a0 = u0[26]; u0[26] = r; a0 += LDA; r = *a0; *a0 = u0[27]; u0[27] = r; a0 += LDA; r = *a0; *a0 = u0[28]; u0[28] = r; a0 += LDA; r = *a0; *a0 = u0[29]; u0[29] = r; a0 += LDA; r = *a0; *a0 = u0[30]; u0[30] = r; a0 += LDA; r = *a0; *a0 = u0[31]; u0[31] = r; a0 += LDA; #endif } } if( nr > 0 ) { for( i = 0; i < M; i++ ) { a0 = A + (size_t)(LINDXA[i]); u0 = U0 + (size_t)(i) * (size_t)(LDU); for( j = 0; j < nr; j++, a0 += LDA ) { r = *a0; *a0 = u0[j]; u0[j] = r; } } } /* * End of HPL_dlaswp06T */ } hpcc-1.4.1/hpl/src/pauxil/HPL_dlaswp10N.c0000644000000000000000000001711011256503657014627 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LASWP10N_DEPTH #define HPL_LASWP10N_DEPTH 32 #define HPL_LASWP10N_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlaswp10N ( const int M, const int N, double * A, const int LDA, const int * IPIV ) #else void HPL_dlaswp10N ( M, N, A, LDA, IPIV ) const int M; const int N; double * A; const int LDA; const int * IPIV; #endif { /* * Purpose * ======= * * HPL_dlaswp10N performs a sequence of local column interchanges on a * matrix A. One column interchange is initiated for columns 0 through * N-1 of A. * * Arguments * ========= * * M (local input) const int * __arg0__ * * N (local input) const int * On entry, M specifies the number of rows of the array A. M * must be at least zero. * * A (local input/output) double * * On entry, N specifies the number of columns of the array A. N * must be at least zero. * * LDA (local input) const int * On entry, A points to an array of dimension (LDA,N). This * array contains the columns onto which the interchanges should * be applied. On exit, A contains the permuted matrix. * * IPIV (local input) const int * * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least MAX(1,M). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double r; double * a0, * a1; const int incA = ( 1 << HPL_LASWP10N_LOG2_DEPTH ); int jp, mr, mu; register int i, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; mr = M - ( mu = (int)( ( (unsigned int)(M) >> HPL_LASWP10N_LOG2_DEPTH ) << HPL_LASWP10N_LOG2_DEPTH ) ); for( j = 0; j < N; j++ ) { if( j != ( jp = IPIV[j] ) ) { a0 = A + j * LDA; a1 = A + jp * LDA; for( i = 0; i < mu; i += incA, a0 += incA, a1 += incA ) { r = *a0; *a0 = *a1; *a1 = r; #if ( HPL_LASWP10N_DEPTH > 1 ) r = a0[ 1]; a0[ 1] = a1[ 1]; a1[ 1] = r; #endif #if ( HPL_LASWP10N_DEPTH > 2 ) r = a0[ 2]; a0[ 2] = a1[ 2]; a1[ 2] = r; r = a0[ 3]; a0[ 3] = a1[ 3]; a1[ 3] = r; #endif #if ( HPL_LASWP10N_DEPTH > 4 ) r = a0[ 4]; a0[ 4] = a1[ 4]; a1[ 4] = r; r = a0[ 5]; a0[ 5] = a1[ 5]; a1[ 5] = r; r = a0[ 6]; a0[ 6] = a1[ 6]; a1[ 6] = r; r = a0[ 7]; a0[ 7] = a1[ 7]; a1[ 7] = r; #endif #if ( HPL_LASWP10N_DEPTH > 8 ) r = a0[ 8]; a0[ 8] = a1[ 8]; a1[ 8] = r; r = a0[ 9]; a0[ 9] = a1[ 9]; a1[ 9] = r; r = a0[10]; a0[10] = a1[10]; a1[10] = r; r = a0[11]; a0[11] = a1[11]; a1[11] = r; r = a0[12]; a0[12] = a1[12]; a1[12] = r; r = a0[13]; a0[13] = a1[13]; a1[13] = r; r = a0[14]; a0[14] = a1[14]; a1[14] = r; r = a0[15]; a0[15] = a1[15]; a1[15] = r; #endif #if ( HPL_LASWP10N_DEPTH > 16 ) r = a0[16]; a0[16] = a1[16]; a1[16] = r; r = a0[17]; a0[17] = a1[17]; a1[17] = r; r = a0[18]; a0[18] = a1[18]; a1[18] = r; r = a0[19]; a0[19] = a1[19]; a1[19] = r; r = a0[20]; a0[20] = a1[20]; a1[20] = r; r = a0[21]; a0[21] = a1[21]; a1[21] = r; r = a0[22]; a0[22] = a1[22]; a1[22] = r; r = a0[23]; a0[23] = a1[23]; a1[23] = r; r = a0[24]; a0[24] = a1[24]; a1[24] = r; r = a0[25]; a0[25] = a1[25]; a1[25] = r; r = a0[26]; a0[26] = a1[26]; a1[26] = r; r = a0[27]; a0[27] = a1[27]; a1[27] = r; r = a0[28]; a0[28] = a1[28]; a1[28] = r; r = a0[29]; a0[29] = a1[29]; a1[29] = r; r = a0[30]; a0[30] = a1[30]; a1[30] = r; r = a0[31]; a0[31] = a1[31]; a1[31] = r; #endif } for( i = 0; i < mr; i++ ) { r = a0[i]; a0[i] = a1[i]; a1[i] = r; } } } /* * End of HPL_dlaswp10N */ } hpcc-1.4.1/hpl/src/pauxil/HPL_indxg2l.c0000644000000000000000000001554311256503657014435 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_indxg2l ( const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS ) #else int HPL_indxg2l ( IG, INB, NB, SRCPROC, NPROCS ) const int IG; const int INB; const int NB; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_indxg2l computes the local index of a matrix entry pointed to by * the global index IG. This local returned index is the same in all * processes. * * Arguments * ========= * * IG (input) const int * On entry, IG specifies the global index of the matrix entry. * IG must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix. NB must be larger than one. * * SRCPROC (input) const int * On entry, if SRCPROC = -1, the data is not distributed but * replicated, in which case this routine returns IG in all * processes. Otherwise, the value of SRCPROC is ignored. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, j; /* .. * .. Executable Statements .. */ if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * IG belongs to the first block, or the data is not distributed, or * there is just one process in this dimension of the grid. */ return( IG ); /* * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC * with 0 <= MYROC < NPROCS. The local index to be returned depends on * whether IG resides in the process owning the first partial block of * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that * j=l and thus (j+1)*NPROCS > i+1. */ j = ( i = ( IG - INB ) / NB ) / NPROCS; /* * When IG resides in the process owning the first partial block of size * INB (MYROC = 0), then the result IL can be written as: * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. * * Otherwise when MYROC >= 1, the result IL can be written as: * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. */ return( NB * (j - i) + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ) ); /* * End of HPL_indxg2l */ } hpcc-1.4.1/hpl/src/pauxil/HPL_indxg2lp.c0000644000000000000000000001733711256503657014620 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_indxg2lp ( int * IL, int * PROC, const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS ) #else void HPL_indxg2lp ( IL, PROC, IG, INB, NB, SRCPROC, NPROCS ) int * IL; int * PROC; const int IG; const int INB; const int NB; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_indxg2lp computes the local index of a matrix entry pointed to by * the global index IG as well as the process coordinate which posseses * this entry. The local returned index is the same in all processes. * * Arguments * ========= * * IL (output) int * * On exit, IL specifies the local index corresponding to IG. IL * is at least zero. * * PROC (output) int * * On exit, PROC is the coordinate of the process owning the * entry specified by the global index IG. PROC is at least zero * and less than NPROCS. * * IG (input) const int * On entry, IG specifies the global index of the matrix entry. * IG must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * SRCPROC (input) const int * On entry, if SRCPROC = -1, the data is not distributed but * replicated, in which case this routine returns IG in all * processes. Otherwise, the value of SRCPROC is ignored. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, j; /* .. * .. Executable Statements .. */ if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) { /* * IG belongs to the first block, or the data is not distributed, or * there is just one process in this dimension of the grid. */ *IL = IG; *PROC = SRCPROC; } else { /* * IG = INB - NB + ( l * NPROCS + MYROC ) * NB + X with 0 <= X < NB, * thus IG is to be found in the block (IG-INB+NB) / NB = l*NPROCS+MYROC * with 0 <= MYROC < NPROCS. The local index to be returned depends on * whether IG resides in the process owning the first partial block of * size INB (MYROC=0). To determine this cheaply, let i = (IG-INB) / NB, * so that if NPROCS divides i+1, i.e. MYROC=0, we have i+1 = l*NPROCS. * If we set j = i / NPROCS, it follows that j = l-1. Therefore, i+1 is * equal to (j+1) * NPROCS. Conversely, if NPROCS does not divide i+1, * then i+1 = l*NPROCS + MYROC with 1 <= MYROC < NPROCS. It follows that * j=l and thus (j+1)*NPROCS > i+1. */ j = ( i = ( IG - INB ) / NB ) / NPROCS; /* * IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC and take * the NPROCS modulo (definition of the block-cyclic data distribution). */ *PROC = SRCPROC + 1 + i; *PROC = MPosMod( *PROC, NPROCS ); /* * When IG resides in the process owning the first partial block of size * INB (MYROC = 0), then the result IL can be written as: * IL = INB - NB + l * NB + X = IG + ( l - (l * NPROCS + MYROC) ) * NB. * Using the above notation, we have i+1 = l*NPROCS + MYROC = l*NPROCS, * i.e l = ( i+1 ) / NPROCS = j+1, since NPROCS divides i+1, therefore * IL = IG + ( j + 1 - ( i + 1 ) ) * NB. * * Otherwise when MYROC >= 1, the result IL can be written as: * IL = l * NB + X = IG - INB + ( ( l+1 ) - ( l * NPROCS + MYROC ) )*NB. * We still have i+1 = l*NPROCS+MYROC. Since NPROCS does not divide i+1, * we have j = (l*NPROCS+MYROC-1) / NPROCS = l, i.e * IL = IG - INB + ( j + 1 - ( i + 1 ) ) * NB. */ *IL = NB * (j - i) + ( ( i + 1 - ( j + 1 )*NPROCS ) ? IG - INB : IG ); } /* * End of HPL_indxg2lp */ } hpcc-1.4.1/hpl/src/pauxil/HPL_indxg2p.c0000644000000000000000000001306111256503657014432 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_indxg2p ( const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS ) #else int HPL_indxg2p ( IG, INB, NB, SRCPROC, NPROCS ) const int IG; const int INB; const int NB; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_indxg2p computes the process coordinate which posseses the entry * of a matrix specified by a global index IG. * * Arguments * ========= * * IG (input) const int * On entry, IG specifies the global index of the matrix entry. * IG must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * SRCPROC (input) const int * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. SRCPROC * must be at least zero and strictly less than NPROCS. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int proc; /* .. * .. Executable Statements .. */ if( ( IG < INB ) || ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * IG belongs to the first block, or the data is not distributed, or * there is just one process in this dimension of the grid. */ return( SRCPROC ); /* * Otherwise, IG is in block 1 + ( IG - INB ) / NB. Add this to SRCPROC * and take the NPROCS modulo (definition of the block-cyclic data dis- * tribution). */ proc = SRCPROC + 1 + ( IG - INB ) / NB; return( MPosMod( proc, NPROCS ) ); /* * End of HPL_indxg2p */ } hpcc-1.4.1/hpl/src/pauxil/HPL_indxl2g.c0000644000000000000000000001540111256503657014426 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_indxl2g ( const int IL, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS ) #else int HPL_indxl2g ( IL, INB, NB, PROC, SRCPROC, NPROCS ) const int IL; const int INB; const int NB; const int PROC; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_indxl2g computes the global index of a matrix entry pointed to * by the local index IL of the process indicated by PROC. * * Arguments * ========= * * IL (input) const int * On entry, IL specifies the local index of the matrix entry. * IL must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * PROC (input) const int * On entry, PROC specifies the coordinate of the process whose * local array row or column is to be determined. PROC must be * at least zero and strictly less than NPROCS. * * SRCPROC (input) const int * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. SRCPROC * must be at least zero and strictly less than NPROCS. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) { /* * The data is not distributed, or there is just one process in this di- * mension of the grid. */ return( IL ); } else if( PROC == SRCPROC ) { /* * If I am SRCPROC, my first block is of size INB */ if( IL < INB ) /* * If IL belongs to the first block, the local and global indexes are * equal. */ return ( IL ); /* * The number of entire blocks before the one IL belongs to is * ( IL - INB ) / NB + 1. In the other NPROCS-1 processes, there are * thus NB*( ( IL-INB )/NB + 1 ) entries, that are globally before the * global entry corresponding to IL. */ return( ( NPROCS - 1 ) * NB * ( ( IL - INB ) / NB + 1 ) + IL ); } else if( PROC < SRCPROC ) { /* * Otherwise, the process of coordinate MOD(SRCPROC+1, NPROCS) owns the * second block. Let IPROC = PROC-SRCPROC-1+NPROCS be the number of pro- * cesses between this process and PROC not included when going from * left to right on the process line with possible wrap around. These * IPROC processes have one more NB block than the other processes, who * own IL / NB blocks of size NB. */ return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1+NPROCS )+IL+INB ); } else { /* * Same reasoning as above with IPROC = PROC - SRCPROC - 1. */ return( NB*( (NPROCS-1)*(IL/NB)+PROC-SRCPROC-1 )+IL+INB ); } /* * End of HPL_indxl2g */ } hpcc-1.4.1/hpl/src/pauxil/HPL_infog2l.c0000644000000000000000000003402411256503657014421 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_infog2l ( int I, int J, const int IMB, const int MB, const int INB, const int NB, const int RSRC, const int CSRC, const int MYROW, const int MYCOL, const int NPROW, const int NPCOL, int * II, int * JJ, int * PROW, int * PCOL ) #else void HPL_infog2l ( I, J, IMB, MB, INB, NB, RSRC, CSRC, MYROW, MYCOL, NPROW, NPCOL, II, JJ, PROW, PCOL ) int I; int J; const int IMB; const int MB; const int INB; const int NB; const int RSRC; const int CSRC; const int MYROW; const int MYCOL; const int NPROW; const int NPCOL; int * II; int * JJ; int * PROW; int * PCOL; #endif { /* * Purpose * ======= * * HPL_infog2l computes the starting local index II, JJ corresponding to * the submatrix starting globally at the entry pointed by I, J. This * routine returns the coordinates in the grid of the process owning the * matrix entry of global indexes I, J, namely PROW and PCOL. * * Arguments * ========= * * I (global input) int * On entry, I specifies the global row index of the matrix * entry. I must be at least zero. * * J (global input) int * On entry, J specifies the global column index of the matrix * entry. J must be at least zero. * * IMB (global input) const int * On entry, IMB specifies the size of the first row block of * the global matrix. IMB must be at least one. * * MB (global input) const int * On entry, MB specifies the blocking factor used to partition * and distribute the rows of the matrix A. MB must be larger * than one. * * INB (global input) const int * On entry, INB specifies the size of the first column block of * the global matrix. INB must be at least one. * * NB (global input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the columns of the matrix A. NB must be larger * than one. * * RSRC (global input) const int * On entry, RSRC specifies the row coordinate of the process * that possesses the row I. RSRC must be at least zero and * strictly less than NPROW. * * CSRC (global input) const int * On entry, CSRC specifies the column coordinate of the process * that possesses the column J. CSRC must be at least zero and * strictly less than NPCOL. * * MYROW (local input) const int * On entry, MYROW specifies my row process coordinate in the * grid. MYROW is greater than or equal to zero and less than * NPROW. * * MYCOL (local input) const int * On entry, MYCOL specifies my column process coordinate in the * grid. MYCOL is greater than or equal to zero and less than * NPCOL. * * NPROW (global input) const int * On entry, NPROW specifies the number of process rows in the * grid. NPROW is at least one. * * NPCOL (global input) const int * On entry, NPCOL specifies the number of process columns in * the grid. NPCOL is at least one. * * II (local output) int * * On exit, II specifies the local starting row index of the * submatrix. On exit, II is at least 0. * * JJ (local output) int * * On exit, JJ specifies the local starting column index of the * submatrix. On exit, JJ is at least 0. * * PROW (global output) int * * On exit, PROW is the row coordinate of the process owning the * entry specified by the global index I. PROW is at least zero * and less than NPROW. * * PCOL (global output) int * * On exit, PCOL is the column coordinate of the process owning * the entry specified by the global index J. PCOL is at least * zero and less than NPCOL. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int ilocblk, imb, inb, mb, mydist, nb, nblocks, csrc, rsrc; /* .. * .. Executable Statements .. */ imb = IMB; *PROW = RSRC; if( ( *PROW == -1 ) || ( NPROW == 1 ) ) { /* * The data is not distributed, or there is just one process row in the * grid. */ *II = I; } else if( I < imb ) { /* * I refers to an entry in the first block of rows */ *II = ( MYROW == *PROW ? I : 0 ); } else { mb = MB; rsrc = *PROW; /* * The discussion goes as follows: compute my distance from the source * process so that within this process coordinate system, the source * process is the process such that mydist = 0, or equivalently * MYROW == rsrc. * * Find out the global coordinate of the block I belongs to (nblocks), * as well as the minimum local number of blocks that every process has. * * when mydist < nblocks-ilocblk*NPROCS, I own ilocblk + 1 full blocks, * when mydist > nblocks-ilocblk*NPROCS, I own ilocblk full blocks, * when mydist = nblocks-ilocblk*NPROCS, I own ilocblk full blocks * but not I, or I own ilocblk + 1 blocks and the entry I refers to. */ if( MYROW == rsrc ) { /* * I refers to an entry that is not in the first block, find out which * process has it. */ nblocks = ( I - imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Since mydist = 0 and nblocks - ilocblk * NPROW >= 0, there are only * three possible cases: * * 1) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I do not own * I, in which case II = IMB + ( ilocblk - 1 ) * MB. Note that this * case cannot happen when ilocblk is zero, since nblocks is at * least one. * * 2) When 0 = mydist = nblocks - ilocblk * NPROW = 0 and I own I, in * which case I and II can respectively be written as IMB + * (nblocks-1)*NB + IL and IMB + (ilocblk-1) * MB + IL. That is * II = I + (ilocblk-nblocks)*MB. Note that this case cannot happen * when ilocblk is zero, since nblocks is at least one. * * 3) mydist = 0 < nblocks - ilocblk * NPROW, the source process owns * ilocblk+1 full blocks, and therefore II = IMB + ilocblk * MB. * Note that when ilocblk is zero, II is just IMB. */ if( nblocks < NPROW ) { *II = imb; } else { ilocblk = nblocks / NPROW; if( ilocblk * NPROW >= nblocks ) { *II = ( ( MYROW == *PROW ) ? I + ( ilocblk - nblocks ) * mb : imb + ( ilocblk - 1 ) * mb ); } else { *II = imb + ilocblk * mb; } } } else { /* * I refers to an entry that is not in the first block, find out which * process has it. */ nblocks = ( I -= imb ) / mb + 1; *PROW += nblocks; *PROW -= ( *PROW / NPROW ) * NPROW; /* * Compute my distance from the source process so that within this pro- * cess coordinate system, the source process is the process such that * mydist=0. */ if( ( mydist = MYROW - rsrc ) < 0 ) mydist += NPROW; /* * When mydist < nblocks - ilocblk * NPROW, I own ilocblk+1 full blocks * of size MB since I am not the source process, i.e. II=(ilocblk+1)*MB. * When mydist>=nblocks-ilocblk*NPROW and I do not own I, I own ilocblk * full blocks of size MB, i.e. II = ilocblk*MB, otherwise I own ilocblk * blocks and I, in which case I can be written as IMB + (nblocks-1)*MB * + IL and II = ilocblk*MB + IL = I - IMB + (ilocblk - nblocks + 1)*MB. */ if( nblocks < NPROW ) { mydist -= nblocks; *II = ( ( mydist < 0 ) ? mb : ( ( MYROW == *PROW ) ? I + ( 1 - nblocks ) * mb : 0 ) ); } else { ilocblk = nblocks / NPROW; mydist -= nblocks - ilocblk * NPROW; *II = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * mb : ( ( MYROW == *PROW ) ? ( ilocblk - nblocks + 1 ) * mb + I : ilocblk * mb ) ); } } } /* * Idem for the columns */ inb = INB; *PCOL = CSRC; if( ( *PCOL == -1 ) || ( NPCOL == 1 ) ) { *JJ = J; } else if( J < inb ) { *JJ = ( MYCOL == *PCOL ? J : 0 ); } else { nb = NB; csrc = *PCOL; if( MYCOL == csrc ) { nblocks = ( J - inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( nblocks < NPCOL ) { *JJ = inb; } else { ilocblk = nblocks / NPCOL; if( ilocblk * NPCOL >= nblocks ) { *JJ = ( ( MYCOL == *PCOL ) ? J + ( ilocblk - nblocks ) * nb : inb + ( ilocblk - 1 ) * nb ); } else { *JJ = inb + ilocblk * nb; } } } else { nblocks = ( J -= inb ) / nb + 1; *PCOL += nblocks; *PCOL -= ( *PCOL / NPCOL ) * NPCOL; if( ( mydist = MYCOL - csrc ) < 0 ) mydist += NPCOL; if( nblocks < NPCOL ) { mydist -= nblocks; *JJ = ( ( mydist < 0 ) ? nb : ( ( MYCOL == *PCOL ) ? J + ( 1 - nblocks )*nb : 0 ) ); } else { ilocblk = nblocks / NPCOL; mydist -= nblocks - ilocblk * NPCOL; *JJ = ( ( mydist < 0 ) ? ( ilocblk + 1 ) * nb : ( ( MYCOL == *PCOL ) ? ( ilocblk - nblocks + 1 ) * nb + J : ilocblk * nb ) ); } } } /* * End of HPL_infog2l */ } hpcc-1.4.1/hpl/src/pauxil/HPL_numroc.c0000644000000000000000000001267611256503657014375 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_numroc ( const int N, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS ) #else int HPL_numroc ( N, INB, NB, PROC, SRCPROC, NPROCS ) const int N; const int INB; const int NB; const int PROC; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_numroc returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global * index 0. * * Arguments * ========= * * N (input) const int * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of the * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * PROC (input) const int * On entry, PROC specifies the coordinate of the process whose * local portion is determined. PROC must be at least zero and * strictly less than NPROCS. * * SRCPROC (input) const int * On entry, SRCPROC specifies the coordinate of the process * that possesses the first row or column of the matrix. SRCPROC * must be at least zero and strictly less than NPROCS. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process rows * or columns over which the matrix is distributed. NPROCS must * be at least one. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ return( HPL_numrocI( N, 0, INB, NB, PROC, SRCPROC, NPROCS ) ); /* * End of HPL_numroc */ } hpcc-1.4.1/hpl/src/pauxil/HPL_numrocI.c0000644000000000000000000002377711256503657014512 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_numrocI ( const int N, const int I, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS ) #else int HPL_numrocI ( N, I, INB, NB, PROC, SRCPROC, NPROCS ) const int N; const int I; const int INB; const int NB; const int PROC; const int SRCPROC; const int NPROCS; #endif { /* * Purpose * ======= * * HPL_numrocI returns the local number of matrix rows/columns process * PROC will get if we give out N rows/columns starting from global * index I. * * Arguments * ========= * * N (input) const int * On entry, N specifies the number of rows/columns being dealt * out. N must be at least zero. * * I (input) const int * On entry, I specifies the global index of the matrix entry * I must be at least zero. * * INB (input) const int * On entry, INB specifies the size of the first block of th * global matrix. INB must be at least one. * * NB (input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * PROC (input) const int * On entry, PROC specifies the coordinate of the process whos * local portion is determined. PROC must be at least zero an * strictly less than NPROCS. * * SRCPROC (input) const int * On entry, SRCPROC specifies the coordinate of the proces * that possesses the first row or column of the matrix. SRCPRO * must be at least zero and strictly less than NPROCS. * * NPROCS (input) const int * On entry, NPROCS specifies the total number of process row * or columns over which the matrix is distributed. NPROCS mus * be at least one. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int ilocblk, inb, mydist, nblocks, srcproc; /* .. * .. Executable Statements .. */ if( ( SRCPROC == -1 ) || ( NPROCS == 1 ) ) /* * The data is not distributed, or there is just one process in this di- * mension of the grid. */ return( N ); /* * Compute coordinate of process owning I and corresponding INB */ srcproc = SRCPROC; if( ( inb = INB - I ) <= 0 ) { /* * I is not in the first block, find out which process has it and update * the size of first block */ srcproc += ( nblocks = (-inb) / NB + 1 ); srcproc -= ( srcproc / NPROCS ) * NPROCS; inb += nblocks * NB; } /* * Now everything is just like N, I=0, INB, NB, srcproc, NPROCS. The * discussion goes as follows: compute my distance from the source pro- * cess so that within this process coordinate system, the source pro- * cess is the process such that mydist = 0, or PROC == srcproc. * * Find out how many full blocks are globally (nblocks) and locally * (ilocblk) in those N entries. Then remark that * * when mydist < nblocks - ilocblk*NPROCS, I own ilocblk+1 full blocks, * when mydist > nblocks - ilocblk*NPROCS, I own ilocblk full blocks, * when mydist = nblocks - ilocblk*NPROCS, either the last block is not * full and I own it, or the last block is full and I am the first pro- * cess owning only ilocblk full blocks. */ if( PROC == srcproc ) { /* * I am the source process, i.e. I own I (mydist=0). When N <= INB, the * answer is simply N. */ if( N <= inb ) return( N ); /* * Find out how many full blocks are globally (nblocks) and locally * (ilocblk) in those N entries. */ nblocks = ( N - inb ) / NB + 1; /* * Since mydist = 0 and nblocks - ilocblk * NPROCS >= 0, there are only * two possible cases: * * 1) When mydist = nblocks - ilocblk * NPROCS = 0, that is NPROCS di- * vides the global number of full blocks, then the source process * srcproc owns one more block than the other processes; and N can * be rewritten as N = INB + (nblocks-1) * NB + LNB with LNB >= 0 * size of the last block. Similarly, the local value Np correspon- * ding to N can be written as Np = INB + (ilocblk-1) * NB + LNB = * N + ( ilocblk-1 - (nblocks-1) )*NB. Note that this case cannot * happen when ilocblk is zero, since nblocks is at least one. * * 2) mydist = 0 < nblocks - ilocblk * NPROCS, the source process only * owns full blocks, and therefore Np = INB + ilocblk * NB. Note * that when ilocblk is zero, Np is just INB. */ if( nblocks < NPROCS ) return( inb ); ilocblk = nblocks / NPROCS; return( ( nblocks - ilocblk * NPROCS ) ? inb + ilocblk * NB : N + ( ilocblk - nblocks ) * NB ); } else { /* * I am not the source process. When N <= INB, the answer is simply 0. */ if( N <= inb ) return( 0 ); /* * Find out how many full blocks are globally (nblocks) and locally * (ilocblk) in those N entries */ nblocks = ( N - inb ) / NB + 1; /* * Compute my distance from the source process so that within this pro- * cess coordinate system, the source process is the process such that * mydist=0. */ if( ( mydist = PROC - srcproc ) < 0 ) mydist += NPROCS; /* * When mydist < nblocks - ilocblk*NPROCS, I own ilocblk + 1 full blocks * of size NB since I am not the source process, * * when mydist > nblocks - ilocblk * NPROCS, I own ilocblk full blocks * of size NB since I am not the source process, * * when mydist = nblocks - ilocblk*NPROCS, * either the last block is not full and I own it, in which case * N = INB + (nblocks - 1)*NB + LNB with LNB the size of the last * block such that NB > LNB > 0; the local value Np corresponding to * N is given by Np = ilocblk*NB+LNB = N-INB+(ilocblk-nblocks+1)*NB; * or the last block is full and I am the first process owning only * ilocblk full blocks of size NB, that is N = INB+(nblocks-1)*NB and * Np = ilocblk * NB = N - INB + (ilocblk-nblocks+1) * NB. */ if( nblocks < NPROCS ) return( ( mydist < nblocks ) ? NB : ( ( mydist > nblocks ) ? 0 : N - inb + NB * ( 1 - nblocks ) ) ); ilocblk = nblocks / NPROCS; mydist -= nblocks - ilocblk * NPROCS; return( ( mydist < 0 ) ? ( ilocblk + 1 ) * NB : ( ( mydist > 0 ) ? ilocblk * NB : N - inb + NB * ( ilocblk - nblocks + 1 ) ) ); } /* * End of HPL_numrocI */ } hpcc-1.4.1/hpl/src/pauxil/HPL_pabort.c0000644000000000000000000001275011256503657014352 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pabort ( int LINE, const char * SRNAME, const char * FORM, ... ) #else void HPL_pabort( va_alist ) va_dcl #endif { /* * Purpose * ======= * * HPL_pabort displays an error message on stderr and halts execution. * * * Arguments * ========= * * LINE (local input) int * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a positive line * number, it is ignored. * * SRNAME (local input) const char * * On entry, SRNAME should be the name of the routine calling * this error handler. * * FORM (local input) const char * * On entry, FORM specifies the format, i.e., how the subsequent * arguments are converted for output. * * (local input) ... * On entry, ... is the list of arguments to be printed within * the format string. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ va_list argptr; int rank; char cline[128]; #ifndef HPL_STDC_HEADERS int LINE; char * FORM, * SRNAME; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_STDC_HEADERS va_start( argptr, FORM ); #else va_start( argptr ); LINE = va_arg( argptr, int ); SRNAME = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif (void) vsprintf( cline, FORM, argptr ); va_end( argptr ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); /* * Display an error message */ if( LINE <= 0 ) HPL_fprintf( stderr, "%s %s %d, %s %s:\n>>> %s <<< Abort ...\n\n", "HPL ERROR", "from process #", rank, "in function", SRNAME, cline ); else HPL_fprintf( stderr, "%s %s %d, %s %d %s %s:\n>>> %s <<< Abort ...\n\n", "HPL ERROR", "from process #", rank, "on line", LINE, "of function", SRNAME, cline ); MPI_Abort( MPI_COMM_WORLD, -1 ); exit( -1 ); /* * End of HPL_pabort */ } hpcc-1.4.1/hpl/src/pauxil/HPL_pdlamch.c0000644000000000000000000001510111256503657014464 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS double HPL_pdlamch ( MPI_Comm COMM, const HPL_T_MACH CMACH ) #else double HPL_pdlamch ( COMM, CMACH ) MPI_Comm COMM; const HPL_T_MACH CMACH; #endif { /* * Purpose * ======= * * HPL_pdlamch determines machine-specific arithmetic constants such as * the relative machine precision (eps), the safe minimum(sfmin) such that * 1/sfmin does not overflow, the base of the machine (base), the precision * (prec), the number of (base) digits in the mantissa (t), whether * rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum * exponent before (gradual) underflow (emin), the underflow threshold * (rmin)- base**(emin-1), the largest exponent before overflow (emax), the * overflow threshold (rmax) - (base**emax)*(1-eps). * * Arguments * ========= * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection. * * CMACH (global input) const HPL_T_MACH * Specifies the value to be returned by HPL_pdlamch * = HPL_MACH_EPS, HPL_pdlamch := eps (default) * = HPL_MACH_SFMIN, HPL_pdlamch := sfmin * = HPL_MACH_BASE, HPL_pdlamch := base * = HPL_MACH_PREC, HPL_pdlamch := eps*base * = HPL_MACH_MLEN, HPL_pdlamch := t * = HPL_MACH_RND, HPL_pdlamch := rnd * = HPL_MACH_EMIN, HPL_pdlamch := emin * = HPL_MACH_RMIN, HPL_pdlamch := rmin * = HPL_MACH_EMAX, HPL_pdlamch := emax * = HPL_MACH_RMAX, HPL_pdlamch := rmax * * where * * eps = relative machine precision, * sfmin = safe minimum, * base = base of the machine, * prec = eps*base, * t = number of digits in the mantissa, * rnd = 1.0 if rounding occurs in addition, * emin = minimum exponent before underflow, * rmin = underflow threshold, * emax = largest exponent before overflow, * rmax = overflow threshold. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double param; /* .. * .. Executable Statements .. */ param = HPL_dlamch( CMACH ); switch( CMACH ) { case HPL_MACH_EPS : case HPL_MACH_SFMIN : case HPL_MACH_EMIN : case HPL_MACH_RMIN : (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, HPL_max, COMM ); break; case HPL_MACH_EMAX : case HPL_MACH_RMAX : (void) HPL_all_reduce( (void *)(¶m), 1, HPL_DOUBLE, HPL_min, COMM ); break; default : break; } return( param ); /* * End of HPL_pdlamch */ } hpcc-1.4.1/hpl/src/pauxil/HPL_pdlange.c0000644000000000000000000002170611256503657014476 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS double HPL_pdlange ( const HPL_T_grid * GRID, const HPL_T_NORM NORM, const int M, const int N, const int NB, const double * A, const int LDA ) #else double HPL_pdlange ( GRID, NORM, M, N, NB, A, LDA ) const HPL_T_grid * GRID; const HPL_T_NORM NORM; const int M; const int N; const int NB; const double * A; const int LDA; #endif { /* * Purpose * ======= * * HPL_pdlange returns the value of the one norm, or the infinity norm, * or the element of largest absolute value of a distributed matrix A: * * * max(abs(A(i,j))) when NORM = HPL_NORM_A, * norm1(A), when NORM = HPL_NORM_1, * normI(A), when NORM = HPL_NORM_I, * * where norm1 denotes the one norm of a matrix (maximum column sum) and * normI denotes the infinity norm of a matrix (maximum row sum). Note * that max(abs(A(i,j))) is not a matrix norm. * * Arguments * ========= * * GRID (local input) const HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * NORM (global input) const HPL_T_NORM * On entry, NORM specifies the value to be returned by this * function as described above. * * M (global input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (global input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * NB (global input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix. NB must be larger than one. * * A (local input) const double * * On entry, A points to an array of dimension (LDA,LocQ(N)), * that contains the local pieces of the distributed matrix A. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,LocP(M)). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double s, v0=HPL_rzero, * work = NULL; MPI_Comm Acomm, Ccomm, Rcomm; int ii, jj, mp, mycol, myrow, npcol, nprow, nq; /* .. * .. Executable Statements .. */ (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); Rcomm = GRID->row_comm; Ccomm = GRID->col_comm; Acomm = GRID->all_comm; Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); if( Mmin( M, N ) == 0 ) { return( v0 ); } else if( NORM == HPL_NORM_A ) { /* * max( abs( A ) ) */ if( ( nq > 0 ) && ( mp > 0 ) ) { for( jj = 0; jj < nq; jj++ ) { for( ii = 0; ii < mp; ii++ ) { v0 = Mmax( v0, Mabs( *A ) ); A++; } A += LDA - mp; } } (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, Acomm ); } else if( NORM == HPL_NORM_1 ) { /* * Find norm_1( A ). */ if( nq > 0 ) { work = (double*)malloc( (size_t)(nq) * sizeof( double ) ); if( work == NULL ) { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } for( jj = 0; jj < nq; jj++ ) { s = HPL_rzero; for( ii = 0; ii < mp; ii++ ) { s += Mabs( *A ); A++; } work[jj] = s; A += LDA - mp; } /* * Find sum of global matrix columns, store on row 0 of process grid */ (void) HPL_reduce( (void *)(work), nq, HPL_DOUBLE, HPL_sum, 0, Ccomm ); /* * Find maximum sum of columns for 1-norm */ if( myrow == 0 ) { v0 = work[HPL_idamax( nq, work, 1 )]; v0 = Mabs( v0 ); } if( work ) free( work ); } /* * Find max in row 0, store result in process (0,0) */ if( myrow == 0 ) (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, Rcomm ); } else if( NORM == HPL_NORM_I ) { /* * Find norm_inf( A ) */ if( mp > 0 ) { work = (double*)malloc( (size_t)(mp) * sizeof( double ) ); if( work == NULL ) { HPL_pabort( __LINE__, "HPL_pdlange", "Memory allocation failed" ); } for( ii = 0; ii < mp; ii++ ) { work[ii] = HPL_rzero; } for( jj = 0; jj < nq; jj++ ) { for( ii = 0; ii < mp; ii++ ) { work[ii] += Mabs( *A ); A++; } A += LDA - mp; } /* * Find sum of global matrix rows, store on column 0 of process grid */ (void) HPL_reduce( (void *)(work), mp, HPL_DOUBLE, HPL_sum, 0, Rcomm ); /* * Find maximum sum of rows for inf-norm */ if( mycol == 0 ) { v0 = work[HPL_idamax( mp, work, 1 )]; v0 = Mabs( v0 ); } if( work ) free( work ); } /* * Find max in column 0, store result in process (0,0) */ if( mycol == 0 ) (void) HPL_reduce( (void *)(&v0), 1, HPL_DOUBLE, HPL_max, 0, Ccomm ); } /* * Broadcast answer to every process in the grid */ (void) HPL_broadcast( (void *)(&v0), 1, HPL_DOUBLE, 0, Acomm ); return( v0 ); /* * End of HPL_pdlange */ } hpcc-1.4.1/hpl/src/pauxil/HPL_pdlaprnt.c0000644000000000000000000001777311256503657014721 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdlaprnt ( const HPL_T_grid * GRID, const int M, const int N, const int NB, double * A, const int LDA, const int IAROW, const int IACOL, const char * CMATNM ) #else void HPL_pdlaprnt ( GRID, M, N, NB, A, LDA, IAROW, IACOL, CMATNM ) const HPL_T_grid * GRID; const int M; const int N; const int NB; double * A; const int LDA; const int IAROW; const int IACOL; const char * CMATNM; #endif { /* * Purpose * ======= * * HPL_pdlaprnt prints to standard error a distributed matrix A. The * local pieces of A are sent to the process of coordinates (0,0) in * the grid and then printed. * * Arguments * ========= * * GRID (local input) const HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * M (global input) const int * On entry, M specifies the number of rows of the coefficient * matrix A. M must be at least zero. * * N (global input) const int * On entry, N specifies the number of columns of the * coefficient matrix A. N must be at least zero. * * NB (global input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix. NB must be larger than one. * * A (local input) double * * On entry, A points to an array of dimension (LDA,LocQ(N)). * This array contains the coefficient matrix to be printed. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,LocP(M)). * * IAROW (global input) const int * On entry, IAROW specifies the row process coordinate owning * the first row of A. IAROW must be larger than or equal to * zero and less than NPROW. * * IACOL (global input) const int * On entry, IACOL specifies the column process coordinate * owning the first column of A. IACOL must be larger than or * equal to zero and less than NPCOL. * * CMATNM (global input) const char * * On entry, CMATNM is the name of the matrix to be printed. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Comm Acomm; double * buf = NULL; int h, i, ib, icurcol=IACOL, icurrow=IAROW, ii=0, j, jb, jj=0, mycol, myrow, npcol, nprow, src; /* .. * .. Executable Statements .. */ (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); Acomm = GRID->all_comm; if( ( myrow == 0 ) && ( mycol == 0 ) ) buf = (double*)malloc( (size_t)(NB) * sizeof( double ) ); for( j = 0; j < N; j += NB ) { jb = N-j; jb = Mmin( jb, NB ); for( h = 0; h < jb; h++ ) { (void) HPL_barrier( Acomm ); for( i = 0; i < M; i += NB ) { ib = M-i; ib = Mmin( ib, NB ); if( ( icurrow == 0 ) && ( icurcol == 0 ) ) { if( ( myrow == 0 ) && ( mycol == 0 ) ) HPL_dlaprnt( ib, 1, Mptr( A, ii, jj+h, LDA ), i+1, j+h+1, LDA, CMATNM ); } else { if( ( myrow == icurrow ) && ( mycol == icurcol ) ) { (void) HPL_send( Mptr( A, ii, jj+h, LDA ), ib, 0, 9000+(j+h)*M+i, Acomm ); } else if( ( myrow == 0 ) && ( mycol == 0 ) ) { src = HPL_pnum( GRID, icurrow, icurcol ); (void) HPL_recv( buf, ib, src, 9000+(j+h)*M+i, Acomm ); HPL_dlaprnt( ib, 1, buf, i+1, j+h+1, NB, CMATNM ); } } if( myrow == icurrow ) ii += ib; icurrow = MModAdd1( icurrow, nprow ); (void) HPL_barrier( Acomm ); } ii = 0; icurrow = IAROW; } if( mycol == icurcol ) jj += jb; icurcol = MModAdd1( icurcol, npcol ); (void) HPL_barrier( Acomm ); } if( ( myrow == 0 ) && ( mycol == 0 ) && ( buf ) ) free( buf ); /* * End of HPL_pdlaprnt */ } hpcc-1.4.1/hpl/src/pauxil/HPL_pwarn.c0000644000000000000000000001347211353467335014214 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pwarn ( FILE * STREAM, int LINE, const char * SRNAME, const char * FORM, ... ) #else void HPL_pwarn( va_alist ) va_dcl #endif { /* * Purpose * ======= * * HPL_pwarn displays an error message. * * * Arguments * ========= * * STREAM (local input) FILE * * On entry, STREAM specifies the output stream. * * LINE (local input) int * On entry, LINE specifies the line number in the file where * the error has occured. When LINE is not a positive line * number, it is ignored. * * SRNAME (local input) const char * * On entry, SRNAME should be the name of the routine calling * this error handler. * * FORM (local input) const char * * On entry, FORM specifies the format, i.e., how the subsequent * arguments are converted for output. * * (local input) ... * On entry, ... is the list of arguments to be printed within * the format string. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ va_list argptr; int rank; char cline[128]; #ifndef HPL_STDC_HEADERS FILE * STREAM; int LINE; char * FORM, * SRNAME; #endif /* .. * .. Executable Statements .. */ #ifdef HPL_STDC_HEADERS va_start( argptr, FORM ); #else va_start( argptr ); STREAM = va_arg( argptr, FILE * ); LINE = va_arg( argptr, int ); SRNAME = va_arg( argptr, char * ); FORM = va_arg( argptr, char * ); #endif (void) vsprintf( cline, FORM, argptr ); va_end( argptr ); MPI_Comm_rank( MPI_COMM_WORLD, &rank ); /* * Display an error message */ if( LINE <= 0 ) HPL_fprintf( STREAM, "%s %s %d, %s %s:\n>>> %s <<<\n\n", "HPL ERROR", "from process #", rank, "in function", SRNAME, cline ); else if( LINE > (1 << 30) ) HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", "HPL WARNING", "from process #", rank, "on line", LINE - (1 << 30), "of function", SRNAME, cline ); else HPL_fprintf( STREAM, "%s %s %d, %s %d %s %s:\n>>> %s <<<\n\n", "HPL ERROR", "from process #", rank, "on line", LINE, "of function", SRNAME, cline ); /* * End of HPL_pwarn */ } hpcc-1.4.1/hpl/src/pfact/HPL_dlocmax.c0000644000000000000000000001474411256503657014312 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_dlocmax ( HPL_T_panel * PANEL, const int N, const int II, const int JJ, double * WORK ) #else void HPL_dlocmax ( PANEL, N, II, JJ, WORK ) HPL_T_panel * PANEL; const int N; const int II; const int JJ; double * WORK; #endif { /* * Purpose * ======= * * HPL_dlocmax finds the maximum entry in the current column and packs * the useful information in WORK[0:3]. On exit, WORK[0] contains the * local maximum absolute value scalar, WORK[1] is the corresponding * local row index, WORK[2] is the corresponding global row index, and * WORK[3] is the coordinate of the process owning this max. When N is * less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set * to the total number of process rows. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * N (local input) const int * On entry, N specifies the local number of rows of the column * of A on which we operate. * * II (local input) const int * On entry, II specifies the row offset where the column to be * operated on starts with respect to the panel. * * JJ (local input) const int * On entry, JJ specifies the column offset where the column to * be operated on starts with respect to the panel. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 4. On exit, * WORK[0] contains the local maximum absolute value scalar, * WORK[1] contains the corresponding local row index, WORK[2] * contains the corresponding global row index, and WORK[3] is * the coordinate of process owning this max. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A; int kk, igindx, ilindx, myrow, nb, nprow; /* .. * .. Executable Statements .. */ if( N > 0 ) { A = Mptr( PANEL->A, II, JJ, PANEL->lda ); myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; nb = PANEL->nb; kk = PANEL->ii + II + ( ilindx = HPL_idamax( N, A, 1 ) ); Mindxl2g( igindx, kk, nb, nb, myrow, 0, nprow ); /* * WORK[0] := local maximum absolute value scalar, * WORK[1] := corresponding local row index, * WORK[2] := corresponding global row index, * WORK[3] := coordinate of process owning this max. */ WORK[0] = A[ilindx]; WORK[1] = (double)(ilindx); WORK[2] = (double)(igindx); WORK[3] = (double)(myrow); } else { /* * If I do not have any row of A, then set the coordinate of the process * (WORK[3]) owning this "ghost" row, such that it will never be used, * even if there are only zeros in the current column of A. */ WORK[0] = WORK[1] = WORK[2] = HPL_rzero; WORK[3] = (double)(PANEL->grid->nprow); } /* * End of HPL_dlocmax */ } hpcc-1.4.1/hpl/src/pfact/HPL_dlocswpN.c0000644000000000000000000004255611256503657014456 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LOCSWP_DEPTH #define HPL_LOCSWP_DEPTH 32 #define HPL_LOCSWP_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlocswpN ( HPL_T_panel * PANEL, const int II, const int JJ, double * WORK ) #else void HPL_dlocswpN ( PANEL, II, JJ, WORK ) HPL_T_panel * PANEL; const int II; const int JJ; double * WORK; #endif { /* * Purpose * ======= * * HPL_dlocswpN performs the local swapping operations within a panel. * The lower triangular N0-by-N0 upper block of the panel is stored in * no-transpose form (i.e. just like the input matrix itself). * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * II (local input) const int * On entry, II specifies the row offset where the column to be * operated on starts with respect to the panel. * * JJ (local input) const int * On entry, JJ specifies the column offset where the column to * be operated on starts with respect to the panel. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2 * (4+2*N0). * WORK[0] contains the local maximum absolute value scalar, * WORK[1] contains the corresponding local row index, WORK[2] * contains the corresponding global row index, and WORK[3] is * the coordinate of process owning this max. The N0 length max * row is stored in WORK[4:4+N0-1]; Note that this is also the * JJth row (or column) of L1. The remaining part of this array * is used as workspace. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double gmax; double * A1, * A2, * L, * Wr0, * Wmx; int ilindx, lda, myrow, n0, nr, nu; register int i; /* .. * .. Executable Statements .. */ myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) << HPL_LOCSWP_LOG2_DEPTH ); nr = n0 - nu; /* * Replicated swap and copy of the current (new) row of A into L1 */ L = Mptr( PANEL->L1, JJ, 0, n0 ); /* * If the pivot is non-zero ... */ if( gmax != HPL_rzero ) { /* * and if I own the current row of A ... */ if( myrow == PANEL->prow ) { /* * and if I also own the row to be swapped with the current row of A ... */ if( myrow == (int)(WORK[3]) ) { /* * and if the current row of A is not to swapped with itself ... */ if( ( ilindx = (int)(WORK[1]) ) != 0 ) { /* * then copy the max row into L1 and locally swap the 2 rows of A. */ A1 = Mptr( PANEL->A, II, 0, lda ); A2 = Mptr( A1, ilindx, 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) { *L=*A1=Wmx[ 0]; *A2=Wr0[ 0]; L+=n0; A1+=lda; A2+=lda; #if ( HPL_LOCSWP_DEPTH > 1 ) *L=*A1=Wmx[ 1]; *A2=Wr0[ 1]; L+=n0; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *L=*A1=Wmx[ 2]; *A2=Wr0[ 2]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[ 3]; *A2=Wr0[ 3]; L+=n0; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *L=*A1=Wmx[ 4]; *A2=Wr0[ 4]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[ 5]; *A2=Wr0[ 5]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[ 6]; *A2=Wr0[ 6]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[ 7]; *A2=Wr0[ 7]; L+=n0; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *L=*A1=Wmx[ 8]; *A2=Wr0[ 8]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[ 9]; *A2=Wr0[ 9]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[10]; *A2=Wr0[10]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[11]; *A2=Wr0[11]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[12]; *A2=Wr0[12]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[13]; *A2=Wr0[13]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[14]; *A2=Wr0[14]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[15]; *A2=Wr0[15]; L+=n0; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *L=*A1=Wmx[16]; *A2=Wr0[16]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[17]; *A2=Wr0[17]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[18]; *A2=Wr0[18]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[19]; *A2=Wr0[19]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[20]; *A2=Wr0[20]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[21]; *A2=Wr0[21]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[22]; *A2=Wr0[22]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[23]; *A2=Wr0[23]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[24]; *A2=Wr0[24]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[25]; *A2=Wr0[25]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[26]; *A2=Wr0[26]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[27]; *A2=Wr0[27]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[28]; *A2=Wr0[28]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[29]; *A2=Wr0[29]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[30]; *A2=Wr0[30]; L+=n0; A1+=lda; A2+=lda; *L=*A1=Wmx[31]; *A2=Wr0[31]; L+=n0; A1+=lda; A2+=lda; #endif } for( i = 0; i < nr; i++, L += n0, A1 += lda, A2 += lda ) { *L = *A1 = Wmx[i]; *A2 = Wr0[i]; } } else { /* * otherwise the current row of A is swapped with itself, so just copy * the current of A into L1. */ *Mptr( PANEL->A, II, JJ, lda ) = gmax; for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) { *L = Wmx[ 0]; L+=n0; #if ( HPL_LOCSWP_DEPTH > 1 ) *L = Wmx[ 1]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; #endif } for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } } } else { /* * otherwise, the row to be swapped with the current row of A is in Wmx, * so copy Wmx into L1 and A. */ A1 = Mptr( PANEL->A, II, 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH ) { *L = *A1 = Wmx[ 0]; L += n0; A1 += lda; #if ( HPL_LOCSWP_DEPTH > 1 ) *L = *A1 = Wmx[ 1]; L += n0; A1 += lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *L = *A1 = Wmx[ 2]; L += n0; A1 += lda; *L = *A1 = Wmx[ 3]; L += n0; A1 += lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *L = *A1 = Wmx[ 4]; L += n0; A1 += lda; *L = *A1 = Wmx[ 5]; L += n0; A1 += lda; *L = *A1 = Wmx[ 6]; L += n0; A1 += lda; *L = *A1 = Wmx[ 7]; L += n0; A1 += lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *L = *A1 = Wmx[ 8]; L += n0; A1 += lda; *L = *A1 = Wmx[ 9]; L += n0; A1 += lda; *L = *A1 = Wmx[10]; L += n0; A1 += lda; *L = *A1 = Wmx[11]; L += n0; A1 += lda; *L = *A1 = Wmx[12]; L += n0; A1 += lda; *L = *A1 = Wmx[13]; L += n0; A1 += lda; *L = *A1 = Wmx[14]; L += n0; A1 += lda; *L = *A1 = Wmx[15]; L += n0; A1 += lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *L = *A1 = Wmx[16]; L += n0; A1 += lda; *L = *A1 = Wmx[17]; L += n0; A1 += lda; *L = *A1 = Wmx[18]; L += n0; A1 += lda; *L = *A1 = Wmx[19]; L += n0; A1 += lda; *L = *A1 = Wmx[20]; L += n0; A1 += lda; *L = *A1 = Wmx[21]; L += n0; A1 += lda; *L = *A1 = Wmx[22]; L += n0; A1 += lda; *L = *A1 = Wmx[23]; L += n0; A1 += lda; *L = *A1 = Wmx[24]; L += n0; A1 += lda; *L = *A1 = Wmx[25]; L += n0; A1 += lda; *L = *A1 = Wmx[26]; L += n0; A1 += lda; *L = *A1 = Wmx[27]; L += n0; A1 += lda; *L = *A1 = Wmx[28]; L += n0; A1 += lda; *L = *A1 = Wmx[29]; L += n0; A1 += lda; *L = *A1 = Wmx[30]; L += n0; A1 += lda; *L = *A1 = Wmx[31]; L += n0; A1 += lda; #endif } for( i = 0; i < nr; i++, L += n0, A1 += lda ) { *L = *A1 = Wmx[i]; } } } else { /* * otherwise I do not own the current row of A, so copy the max row Wmx * into L1. */ for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH ) { *L = Wmx[ 0]; L+=n0; #if ( HPL_LOCSWP_DEPTH > 1 ) *L = Wmx[ 1]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *L = Wmx[ 2]; L+=n0; *L = Wmx[ 3]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *L = Wmx[ 4]; L+=n0; *L = Wmx[ 5]; L+=n0; *L = Wmx[ 6]; L+=n0; *L = Wmx[ 7]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *L = Wmx[ 8]; L+=n0; *L = Wmx[ 9]; L+=n0; *L = Wmx[10]; L+=n0; *L = Wmx[11]; L+=n0; *L = Wmx[12]; L+=n0; *L = Wmx[13]; L+=n0; *L = Wmx[14]; L+=n0; *L = Wmx[15]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *L = Wmx[16]; L+=n0; *L = Wmx[17]; L+=n0; *L = Wmx[18]; L+=n0; *L = Wmx[19]; L+=n0; *L = Wmx[20]; L+=n0; *L = Wmx[21]; L+=n0; *L = Wmx[22]; L+=n0; *L = Wmx[23]; L+=n0; *L = Wmx[24]; L+=n0; *L = Wmx[25]; L+=n0; *L = Wmx[26]; L+=n0; *L = Wmx[27]; L+=n0; *L = Wmx[28]; L+=n0; *L = Wmx[29]; L+=n0; *L = Wmx[30]; L+=n0; *L = Wmx[31]; L+=n0; #endif } for( i = 0; i < nr; i++, L += n0 ) { *L = Wmx[i]; } /* * and if I own the max row, overwrite it with the current row Wr0. */ if( myrow == (int)(WORK[3]) ) { A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) { *A2 = Wr0[ 0]; A2+=lda; #if ( HPL_LOCSWP_DEPTH > 1 ) *A2 = Wr0[ 1]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; #endif } for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } } } } else { /* * Otherwise the max element in the current column is zero, simply copy * the current row Wr0 into L1. The matrix is singular. */ for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) { *L = Wr0[ 0]; L+=n0; #if ( HPL_LOCSWP_DEPTH > 1 ) *L = Wr0[ 1]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *L = Wr0[ 2]; L+=n0; *L = Wr0[ 3]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *L = Wr0[ 4]; L+=n0; *L = Wr0[ 5]; L+=n0; *L = Wr0[ 6]; L+=n0; *L = Wr0[ 7]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *L = Wr0[ 8]; L+=n0; *L = Wr0[ 9]; L+=n0; *L = Wr0[10]; L+=n0; *L = Wr0[11]; L+=n0; *L = Wr0[12]; L+=n0; *L = Wr0[13]; L+=n0; *L = Wr0[14]; L+=n0; *L = Wr0[15]; L+=n0; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *L = Wr0[16]; L+=n0; *L = Wr0[17]; L+=n0; *L = Wr0[18]; L+=n0; *L = Wr0[19]; L+=n0; *L = Wr0[20]; L+=n0; *L = Wr0[21]; L+=n0; *L = Wr0[22]; L+=n0; *L = Wr0[23]; L+=n0; *L = Wr0[24]; L+=n0; *L = Wr0[25]; L+=n0; *L = Wr0[26]; L+=n0; *L = Wr0[27]; L+=n0; *L = Wr0[28]; L+=n0; *L = Wr0[29]; L+=n0; *L = Wr0[30]; L+=n0; *L = Wr0[31]; L+=n0; #endif } for( i = 0; i < nr; i++, L += n0 ) { *L = Wr0[i]; } /* * set INFO. */ if( *(PANEL->DINFO) == 0.0 ) *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); } /* * End of HPL_dlocswpN */ } hpcc-1.4.1/hpl/src/pfact/HPL_dlocswpT.c0000644000000000000000000003771011256503657014460 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Define default value for unrolling factor */ #ifndef HPL_LOCSWP_DEPTH #define HPL_LOCSWP_DEPTH 32 #define HPL_LOCSWP_LOG2_DEPTH 5 #endif #ifdef HPL_STDC_HEADERS void HPL_dlocswpT ( HPL_T_panel * PANEL, const int II, const int JJ, double * WORK ) #else void HPL_dlocswpT ( PANEL, II, JJ, WORK ) HPL_T_panel * PANEL; const int II; const int JJ; double * WORK; #endif { /* * Purpose * ======= * * HPL_dlocswpT performs the local swapping operations within a panel. * The lower triangular N0-by-N0 upper block of the panel is stored in * transpose form. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * II (local input) const int * On entry, II specifies the row offset where the column to be * operated on starts with respect to the panel. * * JJ (local input) const int * On entry, JJ specifies the column offset where the column to * be operated on starts with respect to the panel. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2 * (4+2*N0). * WORK[0] contains the local maximum absolute value scalar, * WORK[1] contains the corresponding local row index, WORK[2] * contains the corresponding global row index, and WORK[3] is * the coordinate of process owning this max. The N0 length max * row is stored in WORK[4:4+N0-1]; Note that this is also the * JJth row (or column) of L1. The remaining part of this array * is used as workspace. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double gmax; double * A1, * A2, * L, * Wr0, * Wmx; int ilindx, lda, myrow, n0, nr, nu; register int i; /* .. * .. Executable Statements .. */ myrow = PANEL->grid->myrow; n0 = PANEL->jb; lda = PANEL->lda; Wr0 = ( Wmx = WORK + 4 ) + n0; Wmx[JJ] = gmax = WORK[0]; nu = (int)( ( (unsigned int)(n0) >> HPL_LOCSWP_LOG2_DEPTH ) << HPL_LOCSWP_LOG2_DEPTH ); nr = n0 - nu; /* * Replicated swap and copy of the current (new) row of A into L1 */ L = Mptr( PANEL->L1, 0, JJ, n0 ); /* * If the pivot is non-zero ... */ if( gmax != HPL_rzero ) { /* * and if I own the current row of A ... */ if( myrow == PANEL->prow ) { /* * and if I also own the row to be swapped with the current row of A ... */ if( myrow == (int)(WORK[3]) ) { /* * and if the current row of A is not to swapped with itself ... */ if( ( ilindx = (int)(WORK[1]) ) != 0 ) { /* * then copy the max row into L1 and locally swap the 2 rows of A. */ A1 = Mptr( PANEL->A, II, 0, lda ); A2 = Mptr( A1, ilindx, 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) { L[ 0]=*A1=Wmx[ 0]; *A2=Wr0[ 0]; A1+=lda; A2+=lda; #if ( HPL_LOCSWP_DEPTH > 1 ) L[ 1]=*A1=Wmx[ 1]; *A2=Wr0[ 1]; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) L[ 2]=*A1=Wmx[ 2]; *A2=Wr0[ 2]; A1+=lda; A2+=lda; L[ 3]=*A1=Wmx[ 3]; *A2=Wr0[ 3]; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) L[ 4]=*A1=Wmx[ 4]; *A2=Wr0[ 4]; A1+=lda; A2+=lda; L[ 5]=*A1=Wmx[ 5]; *A2=Wr0[ 5]; A1+=lda; A2+=lda; L[ 6]=*A1=Wmx[ 6]; *A2=Wr0[ 6]; A1+=lda; A2+=lda; L[ 7]=*A1=Wmx[ 7]; *A2=Wr0[ 7]; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) L[ 8]=*A1=Wmx[ 8]; *A2=Wr0[ 8]; A1+=lda; A2+=lda; L[ 9]=*A1=Wmx[ 9]; *A2=Wr0[ 9]; A1+=lda; A2+=lda; L[10]=*A1=Wmx[10]; *A2=Wr0[10]; A1+=lda; A2+=lda; L[11]=*A1=Wmx[11]; *A2=Wr0[11]; A1+=lda; A2+=lda; L[12]=*A1=Wmx[12]; *A2=Wr0[12]; A1+=lda; A2+=lda; L[13]=*A1=Wmx[13]; *A2=Wr0[13]; A1+=lda; A2+=lda; L[14]=*A1=Wmx[14]; *A2=Wr0[14]; A1+=lda; A2+=lda; L[15]=*A1=Wmx[15]; *A2=Wr0[15]; A1+=lda; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) L[16]=*A1=Wmx[16]; *A2=Wr0[16]; A1+=lda; A2+=lda; L[17]=*A1=Wmx[17]; *A2=Wr0[17]; A1+=lda; A2+=lda; L[18]=*A1=Wmx[18]; *A2=Wr0[18]; A1+=lda; A2+=lda; L[19]=*A1=Wmx[19]; *A2=Wr0[19]; A1+=lda; A2+=lda; L[20]=*A1=Wmx[20]; *A2=Wr0[20]; A1+=lda; A2+=lda; L[21]=*A1=Wmx[21]; *A2=Wr0[21]; A1+=lda; A2+=lda; L[22]=*A1=Wmx[22]; *A2=Wr0[22]; A1+=lda; A2+=lda; L[23]=*A1=Wmx[23]; *A2=Wr0[23]; A1+=lda; A2+=lda; L[24]=*A1=Wmx[24]; *A2=Wr0[24]; A1+=lda; A2+=lda; L[25]=*A1=Wmx[25]; *A2=Wr0[25]; A1+=lda; A2+=lda; L[26]=*A1=Wmx[26]; *A2=Wr0[26]; A1+=lda; A2+=lda; L[27]=*A1=Wmx[27]; *A2=Wr0[27]; A1+=lda; A2+=lda; L[28]=*A1=Wmx[28]; *A2=Wr0[28]; A1+=lda; A2+=lda; L[29]=*A1=Wmx[29]; *A2=Wr0[29]; A1+=lda; A2+=lda; L[30]=*A1=Wmx[30]; *A2=Wr0[30]; A1+=lda; A2+=lda; L[31]=*A1=Wmx[31]; *A2=Wr0[31]; A1+=lda; A2+=lda; #endif } for( i = 0; i < nr; i++, A1 += lda, A2 += lda ) { L[i] = *A1 = Wmx[i]; *A2 = Wr0[i]; } } else { /* * otherwise the current row of A is swapped with itself, so just copy * the current of A into L1. */ *Mptr( PANEL->A, II, JJ, lda ) = gmax; for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) { L[ 0]=Wmx[ 0]; #if ( HPL_LOCSWP_DEPTH > 1 ) L[ 1]=Wmx[ 1]; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) L[ 8]=Wmx[ 8]; L[12]=Wmx[12]; L[ 9]=Wmx[ 9]; L[13]=Wmx[13]; L[10]=Wmx[10]; L[14]=Wmx[14]; L[11]=Wmx[11]; L[15]=Wmx[15]; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) L[16]=Wmx[16]; L[20]=Wmx[20]; L[17]=Wmx[17]; L[21]=Wmx[21]; L[18]=Wmx[18]; L[22]=Wmx[22]; L[19]=Wmx[19]; L[23]=Wmx[23]; L[24]=Wmx[24]; L[28]=Wmx[28]; L[25]=Wmx[25]; L[29]=Wmx[29]; L[26]=Wmx[26]; L[30]=Wmx[30]; L[27]=Wmx[27]; L[31]=Wmx[31]; #endif } for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } } } else { /* * otherwise, the row to be swapped with the current row of A is in Wmx, * so copy Wmx into L1 and A. */ A1 = Mptr( PANEL->A, II, 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) { L[ 0]=*A1=Wmx[ 0]; A1+=lda; #if ( HPL_LOCSWP_DEPTH > 1 ) L[ 1]=*A1=Wmx[ 1]; A1+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) L[ 2]=*A1=Wmx[ 2]; A1+=lda; L[ 3]=*A1=Wmx[ 3]; A1+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) L[ 4]=*A1=Wmx[ 4]; A1+=lda; L[ 5]=*A1=Wmx[ 5]; A1+=lda; L[ 6]=*A1=Wmx[ 6]; A1+=lda; L[ 7]=*A1=Wmx[ 7]; A1+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) L[ 8]=*A1=Wmx[ 8]; A1+=lda; L[ 9]=*A1=Wmx[ 9]; A1+=lda; L[10]=*A1=Wmx[10]; A1+=lda; L[11]=*A1=Wmx[11]; A1+=lda; L[12]=*A1=Wmx[12]; A1+=lda; L[13]=*A1=Wmx[13]; A1+=lda; L[14]=*A1=Wmx[14]; A1+=lda; L[15]=*A1=Wmx[15]; A1+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) L[16]=*A1=Wmx[16]; A1+=lda; L[17]=*A1=Wmx[17]; A1+=lda; L[18]=*A1=Wmx[18]; A1+=lda; L[19]=*A1=Wmx[19]; A1+=lda; L[20]=*A1=Wmx[20]; A1+=lda; L[21]=*A1=Wmx[21]; A1+=lda; L[22]=*A1=Wmx[22]; A1+=lda; L[23]=*A1=Wmx[23]; A1+=lda; L[24]=*A1=Wmx[24]; A1+=lda; L[25]=*A1=Wmx[25]; A1+=lda; L[26]=*A1=Wmx[26]; A1+=lda; L[27]=*A1=Wmx[27]; A1+=lda; L[28]=*A1=Wmx[28]; A1+=lda; L[29]=*A1=Wmx[29]; A1+=lda; L[30]=*A1=Wmx[30]; A1+=lda; L[31]=*A1=Wmx[31]; A1+=lda; #endif } for( i = 0; i < nr; i++, A1 += lda ) { L[i]=*A1=Wmx[i]; } } } else { /* * otherwise I do not own the current row of A, so copy the max row Wmx * into L1. */ for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wmx += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) { L[ 0]=Wmx[ 0]; #if ( HPL_LOCSWP_DEPTH > 1 ) L[ 1]=Wmx[ 1]; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) L[ 2]=Wmx[ 2]; L[ 3]=Wmx[ 3]; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) L[ 4]=Wmx[ 4]; L[ 5]=Wmx[ 5]; L[ 6]=Wmx[ 6]; L[ 7]=Wmx[ 7]; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) L[ 8]=Wmx[ 8]; L[ 9]=Wmx[ 9]; L[10]=Wmx[10]; L[11]=Wmx[11]; L[12]=Wmx[12]; L[13]=Wmx[13]; L[14]=Wmx[14]; L[15]=Wmx[15]; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) L[16]=Wmx[16]; L[17]=Wmx[17]; L[18]=Wmx[18]; L[19]=Wmx[19]; L[20]=Wmx[20]; L[21]=Wmx[21]; L[22]=Wmx[22]; L[23]=Wmx[23]; L[24]=Wmx[24]; L[25]=Wmx[25]; L[26]=Wmx[26]; L[27]=Wmx[27]; L[28]=Wmx[28]; L[29]=Wmx[29]; L[30]=Wmx[30]; L[31]=Wmx[31]; #endif } for( i = 0; i < nr; i++ ) { L[i] = Wmx[i]; } /* * and if I own the max row, overwrite it with the current row Wr0. */ if( myrow == (int)(WORK[3]) ) { A2 = Mptr( PANEL->A, II + (size_t)(WORK[1]), 0, lda ); for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH ) { *A2 = Wr0[ 0]; A2+=lda; #if ( HPL_LOCSWP_DEPTH > 1 ) *A2 = Wr0[ 1]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) *A2 = Wr0[ 2]; A2+=lda; *A2 = Wr0[ 3]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) *A2 = Wr0[ 4]; A2+=lda; *A2 = Wr0[ 5]; A2+=lda; *A2 = Wr0[ 6]; A2+=lda; *A2 = Wr0[ 7]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) *A2 = Wr0[ 8]; A2+=lda; *A2 = Wr0[ 9]; A2+=lda; *A2 = Wr0[10]; A2+=lda; *A2 = Wr0[11]; A2+=lda; *A2 = Wr0[12]; A2+=lda; *A2 = Wr0[13]; A2+=lda; *A2 = Wr0[14]; A2+=lda; *A2 = Wr0[15]; A2+=lda; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) *A2 = Wr0[16]; A2+=lda; *A2 = Wr0[17]; A2+=lda; *A2 = Wr0[18]; A2+=lda; *A2 = Wr0[19]; A2+=lda; *A2 = Wr0[20]; A2+=lda; *A2 = Wr0[21]; A2+=lda; *A2 = Wr0[22]; A2+=lda; *A2 = Wr0[23]; A2+=lda; *A2 = Wr0[24]; A2+=lda; *A2 = Wr0[25]; A2+=lda; *A2 = Wr0[26]; A2+=lda; *A2 = Wr0[27]; A2+=lda; *A2 = Wr0[28]; A2+=lda; *A2 = Wr0[29]; A2+=lda; *A2 = Wr0[30]; A2+=lda; *A2 = Wr0[31]; A2+=lda; #endif } for( i = 0; i < nr; i++, A2 += lda ) { *A2 = Wr0[i]; } } } } else { /* * Otherwise the max element in the current column is zero, simply copy * the current row Wr0 into L1. The matrix is singular. */ for( i = 0; i < nu; i += HPL_LOCSWP_DEPTH, Wr0 += HPL_LOCSWP_DEPTH, L += HPL_LOCSWP_DEPTH ) { L[ 0]=Wr0[ 0]; #if ( HPL_LOCSWP_DEPTH > 1 ) L[ 1]=Wr0[ 1]; #endif #if ( HPL_LOCSWP_DEPTH > 2 ) L[ 2]=Wr0[ 2]; L[ 3]=Wr0[ 3]; #endif #if ( HPL_LOCSWP_DEPTH > 4 ) L[ 4]=Wr0[ 4]; L[ 5]=Wr0[ 5]; L[ 6]=Wr0[ 6]; L[ 7]=Wr0[ 7]; #endif #if ( HPL_LOCSWP_DEPTH > 8 ) L[ 8]=Wr0[ 8]; L[12]=Wr0[12]; L[ 9]=Wr0[ 9]; L[13]=Wr0[13]; L[10]=Wr0[10]; L[14]=Wr0[14]; L[11]=Wr0[11]; L[15]=Wr0[15]; #endif #if ( HPL_LOCSWP_DEPTH > 16 ) L[16]=Wr0[16]; L[20]=Wr0[20]; L[17]=Wr0[17]; L[21]=Wr0[21]; L[18]=Wr0[18]; L[22]=Wr0[22]; L[19]=Wr0[19]; L[23]=Wr0[23]; L[24]=Wr0[24]; L[28]=Wr0[28]; L[25]=Wr0[25]; L[29]=Wr0[29]; L[26]=Wr0[26]; L[30]=Wr0[30]; L[27]=Wr0[27]; L[31]=Wr0[31]; #endif } for( i = 0; i < nr; i++ ) { L[i] = Wr0[i]; } /* * Set INFO. */ if( *(PANEL->DINFO) == 0.0 ) *(PANEL->DINFO) = (double)(PANEL->ia + JJ + 1); } /* * End of HPL_dlocswpT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdfact.c0000644000000000000000000001461611256503657014122 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdfact ( HPL_T_panel * PANEL ) #else void HPL_pdfact ( PANEL ) HPL_T_panel * PANEL; #endif { /* * Purpose * ======= * * HPL_pdfact recursively factorizes a 1-dimensional panel of columns. * The RPFACT function pointer specifies the recursive algorithm to be * used, either Crout, Left- or Right looking. NBMIN allows to vary the * recursive stopping criterium in terms of the number of columns in the * panel, and NDIV allow to specify the number of subpanels each panel * should be divided into. Usuallly a value of 2 will be chosen. Finally * PFACT is a function pointer specifying the non-recursive algorithm to * to be used on at most NBMIN columns. One can also choose here between * Crout, Left- or Right looking. Empirical tests seem to indicate that * values of 4 or 8 for NBMIN give the best results. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ void * vptr = NULL; int align, jb; /* .. * .. Executable Statements .. */ jb = PANEL->jb; PANEL->n -= jb; PANEL->ja += jb; if( ( PANEL->grid->mycol != PANEL->pcol ) || ( jb <= 0 ) ) return; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_RPFACT ); #endif align = PANEL->algo->align; vptr = (void *)malloc( ( (size_t)(align) + (size_t)(((4+((unsigned int)(jb) << 1)) << 1) )) * sizeof(double) ); if( vptr == NULL ) { HPL_pabort( __LINE__, "HPL_pdfact", "Memory allocation failed" ); } /* * Factor the panel - Update the panel pointers */ PANEL->algo->rffun( PANEL, PANEL->mp, jb, 0, (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ) ); if( vptr ) free( vptr ); PANEL->A = Mptr( PANEL->A, 0, jb, PANEL->lda ); PANEL->nq -= jb; PANEL->jj += jb; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_RPFACT ); #endif /* * End of HPL_pdfact */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdmxswp.c0000644000000000000000000003050511256503657014356 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdmxswp ( HPL_T_panel * PANEL, const int M, const int II, const int JJ, double * WORK ) #else void HPL_pdmxswp ( PANEL, M, II, JJ, WORK ) HPL_T_panel * PANEL; const int M; const int II; const int JJ; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdmxswp swaps and broadcasts the absolute value max row using * bi-directional exchange. The buffer is partially set by HPL_dlocmax. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by * * log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) * * where lat and bdwth are the latency and bandwidth of the network for * double precision real elements. Communication only occurs in one * process column. Mono-directional links will cause the communication * cost to double. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of the matrix * column on which this function operates. * * II (local input) const int * On entry, II specifies the row offset where the column to be * operated on starts with respect to the panel. * * JJ (local input) const int * On entry, JJ specifies the column offset where the column to * be operated on starts with respect to the panel. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2 * (4+2*N0). * It is assumed that HPL_dlocmax was called prior to this * routine to initialize the first four entries of this array. * On exit, the N0 length max row is stored in WORK[4:4+N0-1]; * Note that this is also the JJth row (or column) of L1. The * remaining part is used as a temporary array. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double gmax, tmp1; double * A0, * Wmx, * Wwork; HPL_T_grid * grid; MPI_Comm comm; unsigned int hdim, ip2, ip2_, ipow, k, mask; int Np2, cnt_, cnt0, i, icurrow, lda, mydist, mydis_, myrow, n0, nprow, partner, rcnt, root, scnt, size_; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_MXSWP ); #endif grid = PANEL->grid; myrow = grid->myrow; nprow = grid->nprow; /* * ip2 : the smallest power of two less than or equal to nprow; * hdim : dimension of the hypercube made of those ip2 processes; * Np2 : logical flag indicating whether or not nprow is a power of 2; */ comm = grid->col_comm; ip2 = (unsigned int)(grid->row_ip2); hdim = (unsigned int)(grid->row_hdim); n0 = PANEL->jb; icurrow = PANEL->prow; Np2 = (int)( ( size_ = nprow - ip2 ) != 0 ); mydist = MModSub( myrow, icurrow, nprow ); /* * Set up pointers in workspace: WORK and Wwork point to the beginning * of the buffers of size 4 + 2*N0 to be combined. Wmx points to the row * owning the local (before combine) and global (after combine) absolute * value max. A0 points to the copy of the current row of the matrix. */ cnt0 = ( cnt_ = n0 + 4 ) + n0; A0 = ( Wmx = WORK + 4 ) + n0; Wwork = WORK + cnt0; /* * Wmx[0:N0-1] := A[ilindx,0:N0-1] where ilindx is (int)(WORK[1]) (row * with max in current column). If I am the current process row, pack in * addition the current row of A in A0[0:N0-1]. If I do not own any row * of A, then zero out Wmx[0:N0-1]. */ if( M > 0 ) { lda = PANEL->lda; HPL_dcopy( n0, Mptr( PANEL->A, II+(int)(WORK[1]), 0, lda ), lda, Wmx, 1 ); if( myrow == icurrow ) { HPL_dcopy( n0, Mptr( PANEL->A, II, 0, lda ), lda, A0, 1 ); } } else { for( i = 0; i < n0; i++ ) Wmx[i] = HPL_rzero; } /* * Combine the results (bi-directional exchange): the process coordina- * tes are relative to icurrow, this allows to reduce the communication * volume when nprow is not a power of 2. * * When nprow is not a power of 2: proc[i-ip2] receives local data from * proc[i] for all i in [ip2..nprow). In addition, proc[0] (icurrow) * sends to proc[ip2] the current row of A for later broadcast in procs * [ip2..nprow). */ if( ( Np2 != 0 ) && ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) { if( ( mydist & ip2 ) != 0 ) { if( mydist == (int)(ip2) ) (void) HPL_sdrv( WORK, cnt_, MSGID_BEGIN_PFACT, A0, n0, MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, nprow ), comm ); else (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, nprow ), MSGID_BEGIN_PFACT, comm ); } else { if( mydist == 0 ) (void) HPL_sdrv( A0, n0, MSGID_BEGIN_PFACT, Wwork, cnt_, MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, nprow ), comm ); else (void) HPL_recv( Wwork, cnt_, MModAdd( partner, icurrow, nprow ), MSGID_BEGIN_PFACT, comm ); tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); if( ( tmp1 > gmax ) || ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) { HPL_dcopy( cnt_, Wwork, 1, WORK, 1 ); } } } if( mydist < (int)(ip2) ) { /* * power of 2 part of the processes collection: processes [0..ip2) are * combining (binary exchange); proc[0] has two rows to send, but one to * receive. At every step k in [0..hdim) of the algorithm, a process * pair exchanging 2 rows is such that myrow >> k+1 is 0. Among those * processes the ones that are sending one more row than what they are * receiving are such that myrow >> k is equal to 0. */ k = 0; ipow = 1; while( k < hdim ) { if( ( (unsigned int)(mydist) >> ( k + 1 ) ) == 0 ) { if( ( (unsigned int)(mydist) >> k ) == 0 ) { scnt = cnt0; rcnt = cnt_; } else { scnt = cnt_; rcnt = cnt0; } } else { scnt = rcnt = cnt_; } partner = (int)( (unsigned int)(mydist) ^ ipow ); (void) HPL_sdrv( WORK, scnt, MSGID_BEGIN_PFACT, Wwork, rcnt, MSGID_BEGIN_PFACT, MModAdd( partner, icurrow, nprow ), comm ); tmp1 = Mabs( Wwork[0] ); gmax = Mabs( WORK[0] ); if( ( tmp1 > gmax ) || ( ( tmp1 == gmax ) && ( Wwork[3] < WORK[3] ) ) ) { HPL_dcopy( ( rcnt == cnt0 ? cnt0 : cnt_ ), Wwork, 1, WORK, 1 ); } else if( rcnt == cnt0 ) { HPL_dcopy( n0, Wwork+cnt_, 1, A0, 1 ); } ipow <<= 1; k++; } } else if( size_ > 1 ) { /* * proc[ip2] broadcast current row of A to procs [ip2+1..nprow). */ k = (unsigned int)(size_) - 1; ip2_ = mask = 1; while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } root = MModAdd( icurrow, (int)(ip2), nprow ); mydis_ = MModSub( myrow, root, nprow ); do { mask ^= ip2_; if( ( mydis_ & mask ) == 0 ) { partner = (int)(mydis_ ^ ip2_); if( ( mydis_ & ip2_ ) != 0 ) { (void) HPL_recv( A0, n0, MModAdd( root, partner, nprow ), MSGID_BEGIN_PFACT, comm ); } else if( partner < size_ ) { (void) HPL_send( A0, n0, MModAdd( root, partner, nprow ), MSGID_BEGIN_PFACT, comm ); } } ip2_ >>= 1; } while( ip2_ > 0 ); } /* * If nprow is not a power of 2, for all i in [ip2..nprow), proc[i-ip2] * sends the pivot row to proc[i] along with the first four entries of * the WORK array. */ if( ( Np2 != 0 ) && ( ( partner = (int)((unsigned int)(mydist) ^ ip2 ) ) < nprow ) ) { if( ( mydist & ip2 ) != 0 ) { (void) HPL_recv( WORK, cnt_, MModAdd( partner, icurrow, nprow ), MSGID_BEGIN_PFACT, comm ); } else { (void) HPL_send( WORK, cnt_, MModAdd( partner, icurrow, nprow ), MSGID_BEGIN_PFACT, comm ); } } /* * Save the global pivot index in pivot array */ (PANEL->DPIV)[JJ] = WORK[2]; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_MXSWP ); #endif /* * End of HPL_pdmxswp */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpancrN.c0000644000000000000000000002520311256503657014420 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpancrN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpancrN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpancrN factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Crout variant of the usual * one-dimensional algorithm. The lower triangular N0-by-N0 upper block * of the panel is stored in no-transpose form (i.e. just like the input * matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, m=M, n0; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column - initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 > 0 ) { /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); /* * Compute row (column) jj of L1 */ if( kk > 0 ) { L1ptr = Mptr( L1, jj, jj+1, n0 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, Nm1 ); Xv1 = vsip_msubview_d( Xv0, jj, ICOFF, 1, kk ); Yv1 = vsip_msubview_d( Xv0, jj, jj+1, 1, Nm1 ); vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Av1, VSIP_MAT_NTRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplTrans, kk, Nm1, -HPL_rone, Mptr( L1, ICOFF, jj+1, n0 ), n0, Mptr( L1, jj, ICOFF, n0 ), n0, HPL_rone, L1ptr, n0 ); #endif if( curr != 0 ) HPL_dcopy( Nm1, L1ptr, n0, Mptr( A, ii, jj+1, lda ), lda ); } /* * Scale current column by its absolute value max entry - Update dia- * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) * and find local absolute value max in that column (Only one pass * through cache for each current column). This sequence of operations * could benefit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk+1, 1 ); Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ vsip_mdestroy_d( Yv1 ); vsip_mdestroy_d( Xv1 ); vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, ICOFF, jj+1, n0 ), 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); #endif HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; kk++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Xv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpancrN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpancrT.c0000644000000000000000000002512711256503657014433 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpancrT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpancrT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpancrT factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Crout variant of the usual * one-dimensional algorithm. The lower triangular N0-by-N0 upper block * of the panel is stored in transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, kk=0, lda, m=M, n0; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column - initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 > 0 ) { /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); /* * Compute row (column) jj of L1 */ if( kk > 0 ) { L1ptr = Mptr( L1, jj+1, jj, n0 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Xv0, jj+1, ICOFF, Nm1, kk ); Xv1 = vsip_msubview_d( Xv0, ICOFF, jj, kk, 1 ); Yv1 = vsip_msubview_d( Xv0, jj+1, jj, Nm1, 1 ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplNoTrans, Nm1, kk, -HPL_rone, Mptr( L1, jj+1, ICOFF, n0 ), n0, Mptr( L1, ICOFF, jj, n0 ), 1, HPL_rone, L1ptr, 1 ); #endif if( curr != 0 ) HPL_dcopy( Nm1, L1ptr, 1, Mptr( A, ii, jj+1, lda ), lda ); } /* * Scale current column by its absolute value max entry - Update dia- * diagonal and subdiagonal elements in column A(iip1:iip1+Mm1-1, jj+1) * and find local absolute value max in that column (Only one pass * through cache for each current column). This sequence of operations * could benefit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk+1 ); Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk+1 ); Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk+1, -HPL_rone, Mptr( A, iip1, ICOFF, lda ), lda, Mptr( L1, jj+1, ICOFF, n0 ), n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); #endif HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; kk++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Xv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpancrT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpanllN.c0000644000000000000000000002361311256503657014426 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpanllN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpanllN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpanllN factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Left-looking variant of the * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper * block of the panel is stored in no-transpose form (i.e. just like the * input matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, m=M, n0; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column and initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 > 0 ) { /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); L1ptr = Mptr( L1, ICOFF, jj+1, n0 ); kk = jj + 1 - ICOFF; HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans, HplUnit, kk, Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, 1 ); /* * Scale current column by its absolute value max entry - Update and * find local absolute value max in next column (Only one pass through * cache for each next column). This sequence of operations could bene- * fit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); Xv1 = vsip_msubview_d( Xv0, ICOFF, jj+1, kk, 1 ); Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_NTRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, 1, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); #endif HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); if( curr != 0 ) { HPL_dcopy( kk, L1ptr, 1, Mptr( A, ICOFF, jj+1, lda ), 1 ); ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Xv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpanllN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpanllT.c0000644000000000000000000002353411256503657014436 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpanllT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpanllT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpanllT factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Left-looking variant of the * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper * block of the panel is stored in transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Yv1, * Xv0, * Xv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, kk, lda, m=M, n0; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Xv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column and initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 > 0 ) { /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); L1ptr = Mptr( L1, jj+1, ICOFF, n0 ); kk = jj + 1 - ICOFF; HPL_dtrsv( HplColumnMajor, HplUpper, HplTrans, HplUnit, kk, Mptr( L1, ICOFF, ICOFF, n0 ), n0, L1ptr, n0 ); /* * Scale current column by its absolute value max entry - Update and * find local absolute value max in next column (Only one pass through * cache for each next column). This sequence of operations could bene- * fit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+ICOFF, Mm1, kk ); Xv1 = vsip_msubview_d( Xv0, jj+1, ICOFF, 1, kk ); Yv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+1, Mm1, 1 ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Xv1, VSIP_MAT_TRANS, HPL_rone, Yv1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dgemv( HplColumnMajor, HplNoTrans, Mm1, kk, -HPL_rone, Mptr( A, iip1, ICOFF, lda ), lda, L1ptr, n0, HPL_rone, Mptr( A, iip1, jj+1, lda ), 1 ); #endif HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); if( curr != 0 ) { HPL_dcopy( kk, L1ptr, n0, Mptr( A, ICOFF, jj+1, lda ), 1 ); ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Xv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Xv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpanllT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpanrlN.c0000644000000000000000000002376311256503657014442 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpanrlN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpanrlN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Right-looking variant of the * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper * block of the panel is stored in no-transpose form (i.e. just like the * input matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Acur, * Anxt; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column - initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 >= 1 ) { Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); /* * Scale current column by its absolute value max entry - Update trai- * ling sub-matrix and find local absolute value max in next column (On- * ly one pass through cache for each current column). This sequence of * operations could benefit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); HPL_daxpy( Mm1, -WORK[4+jj+1], Acur, 1, Anxt, 1 ); HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); #ifdef HPL_CALL_VSIPL if( Nm1 > 1 ) { /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, Mm1, Nm1-1 ); Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, Mm1, 1 ); Yv1 = vsip_msubview_d( Yv0, jj, jj+2, 1, Nm1-1 ); vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); } #else if( Nm1 > 1 ) HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, WORK+4+jj+2, 1, Mptr( Anxt, 0, 1, lda ), lda ); #endif /* * Same thing as above but with worse data access on y (A += x * y^T) * * if( Nm1 > 1 ) ) * HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, * Mptr( L1, jj, jj+2, n0 ), n0, Mptr( Anxt, 0, 1, lda ), * lda ); */ if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpN( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Yv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpanrlN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdpanrlT.c0000644000000000000000000002344211256503657014442 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdpanrlT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdpanrlT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a * larger one-dimensional panel A using the Right-looking variant of the * usual one-dimensional algorithm. The lower triangular N0-by-N0 upper * block of the panel is stored in transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Note that one iteration of the the main loop is unrolled. The local * computation of the absolute value max of the next column is performed * just after its update by the current column. This allows to bring the * current column only once through cache at each step. The current * implementation does not perform any blocking for this sequence of * BLAS operations, however the design allows for plugging in an optimal * (machine-specific) specialized BLAS-like kernel. This idea has been * suggested to us by Fred Gustavson, IBM T.J. Watson Research Center. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Acur, * Anxt, * L1; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Xv1, * Yv0, * Yv1; #endif int Mm1, Nm1, curr, ii, iip1, jj, lda, m=M, n0; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; curr = (int)( PANEL->grid->myrow == PANEL->prow ); Nm1 = N - 1; jj = ICOFF; if( curr != 0 ) { ii = ICOFF; iip1 = ii+1; Mm1 = m-1; } else { ii = 0; iip1 = ii; Mm1 = m; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Yv0 = vsip_mbind_d( PANEL->L1block, 0, 1, PANEL->jb, PANEL->jb, PANEL->jb ); #endif /* * Find local absolute value max in first column - initialize WORK[0:3] */ HPL_dlocmax( PANEL, m, ii, jj, WORK ); while( Nm1 >= 1 ) { Acur = Mptr( A, iip1, jj, lda ); Anxt = Mptr( Acur, 0, 1, lda ); /* * Swap and broadcast the current row */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); /* * Scale current column by its absolute value max entry - Update trai- * ling sub-matrix and find local absolute value max in next column (On- * ly one pass through cache for each current column). This sequence of * operations could benefit from a specialized blocked implementation. */ if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Acur, 1 ); HPL_daxpy( Mm1, -(*(Mptr( L1, jj+1, jj, n0 ))), Acur, 1, Anxt, 1 ); HPL_dlocmax( PANEL, Mm1, iip1, jj+1, WORK ); if( Nm1 > 1 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj+2, Mm1, Nm1-1 ); Xv1 = vsip_msubview_d( Av0, PANEL->ii+iip1, PANEL->jj+jj, Mm1, 1 ); Yv1 = vsip_msubview_d( Yv0, jj+2, jj, Nm1-1, 1 ); vsip_gemp_d( -HPL_rone, Xv1, VSIP_MAT_NTRANS, Yv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Yv1 ); (void) vsip_mdestroy_d( Xv1 ); (void) vsip_mdestroy_d( Av1 ); #else HPL_dger( HplColumnMajor, Mm1, Nm1-1, -HPL_rone, Acur, 1, Mptr( L1, jj+2, jj, n0 ), 1, Mptr( Anxt, 0, 1, lda ), lda ); #endif } if( curr != 0 ) { ii = iip1; iip1++; m = Mm1; Mm1--; } Nm1--; jj++; } /* * Swap and broadcast last row - Scale last column by its absolute value * max entry */ HPL_pdmxswp( PANEL, m, ii, jj, WORK ); HPL_dlocswpT( PANEL, ii, jj, WORK ); if( WORK[0] != HPL_rzero ) HPL_dscal( Mm1, HPL_rone / WORK[0], Mptr( A, iip1, jj, lda ), 1 ); #ifdef HPL_CALL_VSIPL /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Yv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Yv0 ); (void) vsip_mdestroy_d( Av0 ); #endif #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PFACT ); #endif /* * End of HPL_pdpanrlT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpancrN.c0000644000000000000000000002612311256503657014604 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpancrN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpancrN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the * recursive Crout variant of the usual one-dimensional algorithm. The * lower triangular N0-by-N0 upper block of the panel is stored in * no-transpose form (i.e. just like the input matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Local update - Factor current panel - Replicated update and solve */ #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jb ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); } Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), lda ); #endif HPL_pdrpancrN( PANEL, m, jb, ioff, WORK ); if( n > 0 ) { #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); Av2 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff+jb, jj, n ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, jb, n, jj, -HPL_rone, Mptr( L1ptr, jj, 0, n0 ), n0, Mptr( L1ptr, 0, jj+jb, n0 ), n0, HPL_rone, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); } /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); ii += jb; m -= jb; } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpancrN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpancrT.c0000644000000000000000000002602511256503657014613 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpancrT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpancrT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpancrT recursively factorizes a panel of columns using the * recursive Crout variant of the usual one-dimensional algorithm. * The lower triangular N0-by-N0 upper block of the panel is stored in * transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Local update - Factor current panel - Replicated update and solve */ #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jb ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); } Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), lda ); #endif HPL_pdrpancrT( PANEL, m, jb, ioff, WORK ); if( n > 0 ) { #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ Av1 = vsip_msubview_d( Lv0, ioff+jb, ICOFF, n, jj ); Av2 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, n, jb, jj, -HPL_rone, Mptr( L1ptr, jj+jb, 0, n0 ), n0, Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); #endif HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); } /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); ii += jb; m -= jb; } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpancrT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpanllN.c0000644000000000000000000002356311256503657014614 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpanllN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpanllN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpanllN recursively factorizes a panel of columns using the * recursive Left-looking variant of the one-dimensional algorithm. The * lower triangular N0-by-N0 upper block of the panel is stored in * no-transpose form (i.e. just like the input matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Replicated solve - Local update - Factor current panel */ HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jj, jb, HPL_rone, L1ptr, n0, Mptr( L1ptr, 0, jj, n0 ), n0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jj ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); } Lv1 = vsip_msubview_d( Lv0, ICOFF, ioff, jj, jb ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, jb, jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, 0, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), lda ); #endif HPL_pdrpanllN( PANEL, m, jb, ioff, WORK ); /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); ii += jb; m -= jb; } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpanllN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpanllT.c0000644000000000000000000002351011256503657014612 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpanllT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpanllT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpanllT recursively factorizes a panel of columns using the * recursive Left-looking variant of the one-dimensional algorithm. The * lower triangular N0-by-N0 upper block of the panel is stored in * transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Replicated solve - Local update - Factor current panel */ HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, HplUnit, jb, jj, HPL_rone, L1ptr, n0, Mptr( L1ptr, jj, 0, n0 ), n0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jj ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ICOFF, m, jj ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jj ); } Lv1 = vsip_msubview_d( Lv0, ioff, ICOFF, jb, jj ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Av2 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, jb, jj, -HPL_rone, Mptr( Aptr, ii, 0, lda ), lda, Mptr( L1ptr, jj, 0, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj, lda ), lda ); #endif HPL_pdrpanllT( PANEL, m, jb, ioff, WORK ); /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); ii += jb; m -= jb; } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpanllT */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpanrlN.c0000644000000000000000000002365311256503657014622 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpanrlN ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpanrlN ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpanrlN recursively factorizes a panel of columns using the * recursive Right-looking variant of the one-dimensional algorithm. The * lower triangular N0-by-N0 upper block of the panel is stored in * no-transpose form (i.e. just like the input matrix itself). * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Factor current panel - Replicated solve - Local update */ HPL_pdrpanrlN( PANEL, m, jb, ioff, WORK ); HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, n, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), n0, Mptr( L1ptr, jj, jj+jb, n0 ), n0 ); if( curr != 0 ) { ii += jb; m -= jb; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jb ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, m, n ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); } Lv1 = vsip_msubview_d( Lv0, ioff, ioff+jb, jb, n ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_NTRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, m, n, jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, Mptr( L1ptr, jj, jj+jb, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj+jb, lda ), lda ); #endif /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlacpy( ioff, jb, Mptr( L1, 0, ioff, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpanrlN */ } hpcc-1.4.1/hpl/src/pfact/HPL_pdrpanrlT.c0000644000000000000000000002357711256503657014635 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdrpanrlT ( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK ) #else void HPL_pdrpanrlT ( PANEL, M, N, ICOFF, WORK ) HPL_T_panel * PANEL; const int M; const int N; const int ICOFF; double * WORK; #endif { /* * Purpose * ======= * * HPL_pdrpanrlT recursively factorizes a panel of columns using the * recursive Right-looking variant of the one-dimensional algorithm. The * lower triangular N0-by-N0 upper block of the panel is stored in * transpose form. * * Bi-directional exchange is used to perform the swap::broadcast * operations at once for one column in the panel. This results in a * lower number of slightly larger messages than usual. On P processes * and assuming bi-directional links, the running time of this function * can be approximated by (when N is equal to N0): * * N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + * N0^2 * ( M - N0/3 ) * gam2-3 * * where M is the local number of rows of the panel, lat and bdwth are * the latency and bandwidth of the network for double precision real * words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS * rate of execution. The recursive algorithm allows indeed to almost * achieve Level 3 BLAS performance in the panel factorization. On a * large number of modern machines, this operation is however latency * bound, meaning that its cost can be estimated by only the latency * portion N0 * log_2(P) * lat. Mono-directional links will double this * communication cost. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * M (local input) const int * On entry, M specifies the local number of rows of sub(A). * * N (local input) const int * On entry, N specifies the local number of columns of sub(A). * * ICOFF (global input) const int * On entry, ICOFF specifies the row and column offset of sub(A) * in A. * * WORK (local workspace) double * * On entry, WORK is a workarray of size at least 2*(4+2*N0). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * Aptr, * L1, * L1ptr; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Lv0, * Av1, * Av2, * Lv1; #endif int curr, ii, ioff, jb, jj, lda, m, n, n0, nb, nbdiv, nbmin; /* .. * .. Executable Statements .. */ if( N <= ( nbmin = PANEL->algo->nbmin ) ) { PANEL->algo->pffun( PANEL, M, N, ICOFF, WORK ); return; } /* * Find new recursive blocking factor. To avoid an infinite loop, one * must guarantee: 1 <= jb < N, knowing that N is greater than NBMIN. * First, we compute nblocks: the number of blocks of size NBMIN in N, * including the last one that may be smaller. nblocks is thus larger * than or equal to one, since N >= NBMIN. * The ratio ( nblocks + NDIV - 1 ) / NDIV is thus larger than or equal * to one as well. For NDIV >= 2, we are guaranteed that the quan- * tity ( ( nblocks + NDIV - 1 ) / NDIV ) * NBMIN is less than N and * greater than or equal to NBMIN. */ nbdiv = PANEL->algo->nbdiv; ii = jj = 0; m = M; n = N; nb = jb = ( (((N+nbmin-1) / nbmin) + nbdiv - 1) / nbdiv ) * nbmin; A = PANEL->A; lda = PANEL->lda; L1 = PANEL->L1; n0 = PANEL->jb; L1ptr = Mptr( L1, ICOFF, ICOFF, n0 ); curr = (int)( PANEL->grid->myrow == PANEL->prow ); if( curr != 0 ) Aptr = Mptr( A, ICOFF, ICOFF, lda ); else Aptr = Mptr( A, 0, ICOFF, lda ); /* * The triangular solve is replicated in every process row. The panel * factorization is such that the first rows of A are accumulated in * every process row during the (panel) swapping phase. We ensure this * way a minimum amount of communication during the entire panel facto- * rization. */ do { n -= jb; ioff = ICOFF + jj; /* * Factor current panel - Replicated solve - Local update */ HPL_pdrpanrlT( PANEL, m, jb, ioff, WORK ); HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, HplUnit, n, jb, HPL_rone, Mptr( L1ptr, jj, jj, n0 ), n0, Mptr( L1ptr, jj+jb, jj, n0 ), n0 ); if( curr != 0 ) { ii += jb; m -= jb; } #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L1block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L1block, 0, 1, n0, n0, n0 ); /* * Create the matrix subviews */ if( curr != 0 ) { Av1 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff, m, jb ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ICOFF+ii, PANEL->jj+ioff+jb, m, N ); } else { Av1 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff, m, jb ); Av2 = vsip_msubview_d( Av0, PANEL->ii+ii, PANEL->jj+ioff+jb, m, n ); } Lv1 = vsip_msubview_d( Lv0, ioff+jb, ioff, n, jb ); vsip_gemp_d( -HPL_rone, Av1, VSIP_MAT_NTRANS, Lv1, VSIP_MAT_TRANS, HPL_rone, Av2 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); (void) vsip_mdestroy_d( Av2 ); (void) vsip_mdestroy_d( Av1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, m, n, jb, -HPL_rone, Mptr( Aptr, ii, jj, lda ), lda, Mptr( L1ptr, jj+jb, jj, n0 ), n0, HPL_rone, Mptr( Aptr, ii, jj+jb, lda ), lda ); #endif /* * Copy back upper part of A in current process row - Go the next block */ if( curr != 0 ) { HPL_dlatcpy( ioff, jb, Mptr( L1, ioff, 0, n0 ), n0, Mptr( A, 0, ioff, lda ), lda ); } jj += jb; jb = Mmin( n, nb ); } while( n > 0 ); /* * End of HPL_pdrpanrlT */ } hpcc-1.4.1/hpl/src/pgesv/HPL_equil.c0000644000000000000000000002461211256503657014024 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_equil ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_TRANS TRANS, const int N, double * U, const int LDU, int * IPLEN, const int * IPMAP, const int * IPMAPM1, int * IWORK ) #else void HPL_equil ( PBCST, IFLAG, PANEL, TRANS, N, U, LDU, IPLEN, IPMAP, IPMAPM1, IWORK ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const enum HPL_TRANS TRANS; const int N; double * U; const int LDU; int * IPLEN; const int * IPMAP; const int * IPMAPM1; int * IWORK; #endif { /* * Purpose * ======= * * HPL_equil equilibrates the local pieces of U, so that on exit to * this function, pieces of U contained in every process row are of the * same size. This phase makes the rolling phase optimal. In addition, * this function probes for the column panel L and forwards it when * possible. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be equilibrated) information. * * TRANS (global input) const enum HPL_TRANS * On entry, TRANS specifies whether U is stored in transposed * or non-transposed form. * * N (local input) const int * On entry, N specifies the number of rows or columns of U. N * must be at least 0. * * U (local input/output) double * * On entry, U is an array of dimension (LDU,*) containing the * local pieces of U in each process row. * * LDU (local input) const int * On entry, LDU specifies the local leading dimension of U. LDU * should be at least MAX(1,IPLEN[nprow]) when U is stored in * non-transposed form, and MAX(1,N) otherwise. * * IPLEN (global input) int * * On entry, IPLEN is an array of dimension NPROW+1. This array * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U * in process IPMAP[i]. * * IPMAP (global input) const int * * On entry, IPMAP is an array of dimension NPROW. This array * contains the logarithmic mapping of the processes. In other * words, IPMAP[myrow] is the absolute coordinate of the sorted * process. * * IPMAPM1 (global input) const int * * On entry, IPMAPM1 is an array of dimension NPROW. This array * contains the inverse of the logarithmic mapping contained in * IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i. * * IWORK (workspace) int * * On entry, IWORK is a workarray of dimension NPROW+1. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, ip, ipU, ipcur, iprow, iptgt, lastrow, left, npm1, nprow, ll, llU, llcur, lltgt, right, slen, smax, smin; /* .. * .. Executable Statements .. */ if( ( npm1 = ( nprow = PANEL->grid->nprow ) - 1 ) <= 1 ) return; /* * If the current distribution of the pieces of U is already optimal for * the rolling phase, then return imediately. The optimal distribution * is such that ip processes have smax items and the remaining processes * only have smin items. Another way to check this is to verify that all * differences IPLEN[i+1] - IPLEN[i] are either smin or smax. */ smax = ( ( slen = IPLEN[nprow] ) + npm1 ) / nprow; ip = slen - nprow * ( smin = slen / nprow ); iprow = 0; do { ll = IPLEN[iprow+1] - IPLEN[iprow]; iprow++; } while( ( iprow < nprow ) && ( ( ll == smin ) || ( ll == smax ) ) ); if( iprow == nprow ) return; /* * Now, we are sure the distribution of the pieces of U is not optimal * with respect to the rolling phase, thus perform equilibration. Go * through the list of processes: Processes that have rows that do not * belong to them with respect to the optimal mapping spread them in a * logarithmic fashion. To simplify a little bit the implementation, and * mainly the packing, a source process row spreads its data to its left * first, and then to its right. */ IWORK[nprow] = slen; for( iprow = 0; iprow < nprow; iprow++ ) { llU = IPLEN[iprow+1] - ( ipU = IPLEN[iprow] ); if( iprow < ip ) { lltgt = smax; iptgt = iprow * smax; } else { lltgt = smin; iptgt = iprow * smin + ip; } left = ( ipU < iptgt ); right = ( iptgt + lltgt < ipU + llU ); /* * If I have something to spread to either the left or the right */ if( ( llU > 0 ) && ( left || right ) ) { /* Figure out how much every other process should have */ ipcur = ipU; llcur = llU; for( i = 0; i < nprow; i++ ) { if( i < ip ) { lltgt = smax; iptgt = i * smax; } else { lltgt = smin; iptgt = i * smin + ip; } lastrow = iptgt + lltgt - 1; if( ( lastrow >= ipcur ) && ( llcur > 0 ) ) { ll = lastrow - ipcur + 1; ll = Mmin( ll, llcur ); llcur -= ll; } else { ll = 0; } IWORK[i] = ipcur; ipcur += ll; IWORK[i+1] = ipcur; } /* * Equilibration phase */ if( TRANS == HplNoTrans ) { if( left ) { HPL_spreadN( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, iprow, IWORK, IPMAP, IPMAPM1 ); } if( right ) { HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, iprow, IWORK, IPMAP, IPMAPM1 ); } } else { if( left ) { HPL_spreadT( PBCST, IFLAG, PANEL, HplLeft, N, U, LDU, iprow, IWORK, IPMAP, IPMAPM1 ); } if( right ) { HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, N, U, LDU, iprow, IWORK, IPMAP, IPMAPM1 ); } } } } /* * Finally update IPLEN with the indexes corresponding to the new dis- * tribution of U - IPLEN[nprow] remained unchanged. */ for( i = 0; i < nprow; i++ ) IPLEN[i] = ( i < ip ? i*smax : i*smin + ip ); /* * End of HPL_equil */ } hpcc-1.4.1/hpl/src/pgesv/HPL_logsort.c0000644000000000000000000001714511256503657014401 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_logsort ( const int NPROCS, const int ICURROC, int * IPLEN, int * IPMAP, int * IPMAPM1 ) #else void HPL_logsort ( NPROCS, ICURROC, IPLEN, IPMAP, IPMAPM1 ) const int NPROCS; const int ICURROC; int * IPLEN; int * IPMAP; int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that * contain the logarithmic sorted processes id with repect to the local * number of rows of U that they own. This is necessary to ensure that * the logarithmic spreading of U is optimal in terms of number of steps * and communication volume as well. In other words, the larget pieces * of U will be sent a minimal number of times. * * Arguments * ========= * * NPROCS (global input) const int * On entry, NPROCS specifies the number of process rows in the * process grid. NPROCS is at least one. * * ICURROC (global input) const int * On entry, ICURROC is the source process row. * * IPLEN (global input/output) int * * On entry, IPLEN is an array of dimension NPROCS+1, such that * IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U, * that process i-1 has. On exit, IPLEN[i] is the number of * rows of U in the processes before process IPMAP[i] after the * sort, with the convention that IPLEN[NPROCS] is the total * number of rows of the panel. In other words, IPLEN[i+1] - * IPLEN[i] is the number of rows of A that should be moved to * the process IPMAP[i]. IPLEN is such that the number of rows * of the source process row is IPLEN[1] - IPLEN[0], and the * remaining entries of this array are sorted so that the * quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted. * * IPMAP (global output) int * * On entry, IPMAP is an array of dimension NPROCS. On exit, * array contains the logarithmic mapping of the processes. In * other words, IPMAP[myroc] is the corresponding sorted process * coordinate. * * IPMAPM1 (global output) int * * On entry, IPMAPM1 is an array of dimension NPROCS. On exit, * this array contains the inverse of the logarithmic mapping * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in * [0.. NPROCS) * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int dist, i, ip, iplen_i, iplen_j, itmp, j, k; /* .. * .. Executable Statements .. */ /* * Compute the logarithmic distance between process j and process 0, as * well as the maximum logarithmic distance. IPMAPM1 is workarray here. */ for( j = 0, dist = 0; j < NPROCS; j++ ) { IPMAP[j] = MModAdd( j, ICURROC, NPROCS ); ip = j; itmp = 0; do { if( ip & 1 ) itmp++; ip >>= 1; } while ( ip ); IPMAPM1[j] = itmp; if( itmp > dist ) dist = itmp; } /* * Shift IPLEN[1..NPROCS] of ICURROC places, so that IPLEN[1] is now * what used to be IPLEN[ICURROC+1]. Initialize IPMAP, so that IPMAP[0] * is ICURROC. */ for( j = 0; j < ICURROC; j++ ) { for( i = 2, itmp = IPLEN[1]; i <= NPROCS; i++ ) IPLEN[i-1] = IPLEN[i]; IPLEN[NPROCS] = itmp; } /* * logarithmic sort */ for( k = 1; k <= dist; k++ ) { for( j = 1; j < NPROCS; j++ ) { if( IPMAPM1[j] == k ) { for( i = 2; i < NPROCS; i++ ) { if( k < IPMAPM1[i] ) { iplen_i = IPLEN[i+1]; iplen_j = IPLEN[j+1]; if( iplen_j < iplen_i ) { IPLEN[j+1] = iplen_i; IPLEN[i+1] = iplen_j; itmp = IPMAP[j]; IPMAP[j] = IPMAP[i]; IPMAP[i] = itmp; } } } } } } /* * Compute IPLEN and IPMAPM1 (the inverse of IPMAP) */ IPLEN[0] = 0; for( i = 0; i < NPROCS; i++ ) { IPMAPM1[ IPMAP[i] ] = i; IPLEN[i+1] += IPLEN[i]; } /* * End of HPL_logsort */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdgesv.c0000644000000000000000000001145311256503657014174 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdgesv ( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A ) #else void HPL_pdgesv ( GRID, ALGO, A ) HPL_T_grid * GRID; HPL_T_palg * ALGO; HPL_T_pmat * A; #endif { /* * Purpose * ======= * * HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row * partial pivoting. The main algorithm is the "right looking" variant * with or without look-ahead. The lower triangular factor is left * unpivoted and the pivots are not returned. The right hand side is the * N+1 column of the coefficient matrix. * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ if( A->n <= 0 ) return; A->info = 0; if( ( ALGO->depth == 0 ) || ( GRID->npcol == 1 ) ) { HPL_pdgesv0( GRID, ALGO, A ); } else { HPL_pdgesvK2( GRID, ALGO, A ); } /* * Solve upper triangular system */ if( A->info == 0 ) HPL_pdtrsv( GRID, A ); /* * End of HPL_pdgesv */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdgesv0.c0000644000000000000000000001377711256503657014267 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdgesv0 ( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A ) #else void HPL_pdgesv0 ( GRID, ALGO, A ) HPL_T_grid * GRID; HPL_T_palg * ALGO; HPL_T_pmat * A; #endif { /* * Purpose * ======= * * HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row * partial pivoting. The main algorithm is the "right looking" variant * without look-ahead. The lower triangular factor is left unpivoted and * the pivots are not returned. The right hand side is the N+1 column of * the coefficient matrix. * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ HPL_T_panel * * panel = NULL; HPL_T_UPD_FUN HPL_pdupdate; int N, j, jb, n, nb, tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; /* .. * .. Executable Statements .. */ if( ( N = A->n ) <= 0 ) return; HPL_pdupdate = ALGO->upfun; nb = A->nb; /* * Allocate a panel list of length 1 - Allocate panel[0] resources */ panel = (HPL_T_panel **)malloc( sizeof( HPL_T_panel * ) ); if( panel == NULL ) { HPL_pabort( __LINE__, "HPL_pdgesv0", "Memory allocation failed" ); } HPL_pdpanel_new( GRID, ALGO, N, N+1, Mmin( N, nb ), A, 0, 0, tag, &panel[0] ); /* * Loop over the columns of A */ for( j = 0; j < N; j += nb ) { n = N - j; jb = Mmin( n, nb ); /* * Release panel resources - re-initialize panel data structure */ (void) HPL_pdpanel_free( panel[0] ); HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[0] ); /* * Factor and broadcast current panel - update */ HPL_pdfact( panel[0] ); (void) HPL_binit( panel[0] ); do { (void) HPL_bcast( panel[0], &test ); } while( test != HPL_SUCCESS ); (void) HPL_bwait( panel[0] ); HPL_pdupdate( NULL, NULL, panel[0], -1 ); /* * Update message id for next factorization */ tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); } /* * Release panel resources and panel list */ (void) HPL_pdpanel_disp( &panel[0] ); if( panel ) free( panel ); /* * End of HPL_pdgesv0 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdgesvK1.c0000644000000000000000000001770511256503657014376 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdgesvK1 ( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A ) #else void HPL_pdgesvK1 ( GRID, ALGO, A ) HPL_T_grid * GRID; HPL_T_palg * ALGO; HPL_T_pmat * A; #endif { /* * Purpose * ======= * * HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row * partial pivoting. The main algorithm is the "right looking" variant * with look-ahead. The lower triangular factor is left unpivoted and * the pivots are not returned. The right hand side is the N+1 column of * the coefficient matrix. * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ HPL_T_panel * * panel = NULL; HPL_T_UPD_FUN HPL_pdupdate; int N, depth, icurcol=0, j, jb, jj=0, jstart, k, mycol, n, nb, nn, npcol, nq, tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; /* .. * .. Executable Statements .. */ mycol = GRID->mycol; npcol = GRID->npcol; depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; N = A->n; nb = A->nb; if( N <= 0 ) return; /* * Allocate a panel list of length depth + 1 (depth >= 1) */ panel = (HPL_T_panel **)malloc( (size_t)(depth+1)*sizeof( HPL_T_panel *) ); if( panel == NULL ) { HPL_pabort( __LINE__, "HPL_pdgesvK1", "Memory allocation failed" ); } /* * Create and initialize the first depth panels */ nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; for( k = 0; k < depth; k++ ) { jb = Mmin( nn, nb ); HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, tag, &panel[k] ); nn -= jb; jstart += jb; if( mycol == icurcol ) { jj += jb; nq -= jb; } icurcol = MModAdd1( icurcol, npcol ); tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); } /* * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] */ for( k = 0, j = 0; k < depth; k++ ) { jb = jstart - j; jb = Mmin( jb, nb ); j += jb; /* * Factor and broadcast k-th panel - use long topology for those */ HPL_pdfact( panel[k] ); (void) HPL_binit( panel[k] ); do { (void) HPL_bcast( panel[k], &test ); } while( test != HPL_SUCCESS ); (void) HPL_bwait( panel[k] ); /* * Partial update of the depth-1-k panels in front of me */ if( k < depth - 1 ) { nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); HPL_pdupdate( NULL, NULL, panel[k], nn ); } } /* * Main loop over the remaining columns of A */ for( j = jstart; j < N; j += nb ) { n = N - j; jb = Mmin( n, nb ); /* * Allocate current panel resources - Finish latest update - Factor and * broadcast current panel */ HPL_pdpanel_new( GRID, ALGO, n, n+1, jb, A, j, j, tag, &panel[depth] ); if( mycol == icurcol ) { nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ HPL_pdupdate( NULL, NULL, panel[k], nn ); HPL_pdfact( panel[depth] ); /* factor current panel */ } else { nn = 0; } /* Finish the latest update and broadcast the current panel */ (void) HPL_binit( panel[depth] ); HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); (void) HPL_bwait( panel[depth] ); /* * Release latest panel resources - circular of the panel pointers * Go to the next process row and column - update the message ids for * broadcast */ (void) HPL_pdpanel_disp( &panel[0] ); for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; if( mycol == icurcol ) { jj += jb; nq -= jb; } icurcol = MModAdd1( icurcol, npcol ); tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); } /* * Clean-up: Finish updates - release panels and panel list */ nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); for( k = 0; k < depth; k++ ) { HPL_pdupdate( NULL, NULL, panel[k], nn ); (void) HPL_pdpanel_disp( &panel[k] ); } if( panel ) free( panel ); /* * End of HPL_pdgesvK1 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdgesvK2.c0000644000000000000000000002037111256503657014370 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdgesvK2 ( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A ) #else void HPL_pdgesvK2 ( GRID, ALGO, A ) HPL_T_grid * GRID; HPL_T_palg * ALGO; HPL_T_pmat * A; #endif { /* * Purpose * ======= * * HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row * partial pivoting. The main algorithm is the "right looking" variant * with look-ahead. The lower triangular factor is left unpivoted and * the pivots are not returned. The right hand side is the N+1 column of * the coefficient matrix. * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters. * * A (local input/output) HPL_T_pmat * * On entry, A points to the data structure containing the local * array information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ HPL_T_panel * p, * * panel = NULL; HPL_T_UPD_FUN HPL_pdupdate; int N, depth, icurcol=0, j, jb, jj=0, jstart, k, mycol, n, nb, nn, npcol, nq, tag=MSGID_BEGIN_FACT, test=HPL_KEEP_TESTING; /* .. * .. Executable Statements .. */ mycol = GRID->mycol; npcol = GRID->npcol; depth = ALGO->depth; HPL_pdupdate = ALGO->upfun; N = A->n; nb = A->nb; if( N <= 0 ) return; /* * Allocate a panel list of length depth + 1 (depth >= 1) */ panel = (HPL_T_panel **)malloc( (size_t)(depth+1) * sizeof( HPL_T_panel *) ); if( panel == NULL ) { HPL_pabort( __LINE__, "HPL_pdgesvK2", "Memory allocation failed" ); } /* * Create and initialize the first depth panels */ nq = HPL_numroc( N+1, nb, nb, mycol, 0, npcol ); nn = N; jstart = 0; for( k = 0; k < depth; k++ ) { jb = Mmin( nn, nb ); HPL_pdpanel_new( GRID, ALGO, nn, nn+1, jb, A, jstart, jstart, tag, &panel[k] ); nn -= jb; jstart += jb; if( mycol == icurcol ) { jj += jb; nq -= jb; } icurcol = MModAdd1( icurcol, npcol ); tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); } /* * Create last depth+1 panel */ HPL_pdpanel_new( GRID, ALGO, nn, nn+1, Mmin( nn, nb ), A, jstart, jstart, tag, &panel[depth] ); tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); /* * Initialize the lookahead - Factor jstart columns: panel[0..depth-1] */ for( k = 0, j = 0; k < depth; k++ ) { jb = jstart - j; jb = Mmin( jb, nb ); j += jb; /* * Factor and broadcast k-th panel */ HPL_pdfact( panel[k] ); (void) HPL_binit( panel[k] ); do { (void) HPL_bcast( panel[k], &test ); } while( test != HPL_SUCCESS ); (void) HPL_bwait( panel[k] ); /* * Partial update of the depth-k-1 panels in front of me */ if( k < depth - 1 ) { nn = HPL_numrocI( jstart-j, j, nb, nb, mycol, 0, npcol ); HPL_pdupdate( NULL, NULL, panel[k], nn ); } } /* * Main loop over the remaining columns of A */ for( j = jstart; j < N; j += nb ) { n = N - j; jb = Mmin( n, nb ); /* * Initialize current panel - Finish latest update, Factor and broadcast * current panel */ (void) HPL_pdpanel_free( panel[depth] ); HPL_pdpanel_init( GRID, ALGO, n, n+1, jb, A, j, j, tag, panel[depth] ); if( mycol == icurcol ) { nn = HPL_numrocI( jb, j, nb, nb, mycol, 0, npcol ); for( k = 0; k < depth; k++ ) /* partial updates 0..depth-1 */ (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); HPL_pdfact( panel[depth] ); /* factor current panel */ } else { nn = 0; } /* Finish the latest update and broadcast the current panel */ (void) HPL_binit( panel[depth] ); HPL_pdupdate( panel[depth], &test, panel[0], nq-nn ); (void) HPL_bwait( panel[depth] ); /* * Circular of the panel pointers: * xtmp = x[0]; for( k=0; k < depth; k++ ) x[k] = x[k+1]; x[d] = xtmp; * * Go to next process row and column - update the message ids for broadcast */ p = panel[0]; for( k = 0; k < depth; k++ ) panel[k] = panel[k+1]; panel[depth] = p; if( mycol == icurcol ) { jj += jb; nq -= jb; } icurcol = MModAdd1( icurcol, npcol ); tag = MNxtMgid( tag, MSGID_BEGIN_FACT, MSGID_END_FACT ); } /* * Clean-up: Finish updates - release panels and panel list */ nn = HPL_numrocI( 1, N, nb, nb, mycol, 0, npcol ); for( k = 0; k < depth; k++ ) { (void) HPL_pdupdate( NULL, NULL, panel[k], nn ); (void) HPL_pdpanel_disp( &panel[k] ); } (void) HPL_pdpanel_disp( &panel[depth] ); if( panel ) free( panel ); /* * End of HPL_pdgesvK2 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdlaswp00N.c0000644000000000000000000004141311256503657014633 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdlaswp00N ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdlaswp00N ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdlaswp00N applies the NB row interchanges to NN columns of the * trailing submatrix and broadcast a column panel. * * Bi-directional exchange is used to perform the swap :: broadcast of * the row panel U at once, resulting in a lower number of messages than * usual as well as a lower communication volume. With P process rows and * assuming bi-directional links, the running time of this function can * be approximated by: * * log_2(P) * (lat + NB*LocQ(N) / bdwth) * * where NB is the number of rows of the row panel U, N is the global * number of columns being updated, lat and bdwth are the latency and * bandwidth of the network for double precision real words. Mono * directional links will double this communication cost. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be broadcast and swapped) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be swapped and broadcast starting at * the current position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Comm comm; HPL_T_grid * grid; double * A, * U, * W; void * vptr = NULL; int * ipID, * lindxA, * lindxAU, * llen, * llen_sv; unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, mydist, mydis_; int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, hdim, i, icurrow, *iflag, ipA, ipW, *ipl, iprow, jb, k, lda, ldW, myrow, n, nprow, partner, root, size_, usize; #define LDU jb /* .. * .. Executable Statements .. */ n = Mmin( NN, PANEL->n ); jb = PANEL->jb; /* * Quick return if there is nothing to do */ if( ( n <= 0 ) || ( jb <= 0 ) ) return; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * Retrieve parameters from the PANEL data structure */ grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; hdim = grid->row_hdim; align = PANEL->algo->align; A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; ldW = n + 1; /* * Allocate space for temporary W (ldW * jb) */ vptr = (void*)malloc( ((size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); if( vptr == NULL ) { HPL_pabort( __LINE__, "HPL_pdlaswp00N", "Memory allocation failed" ); } W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); /* * Construct ipID and its local counter parts lindxA, lindxAU - llen is * the number of rows/columns that I have in workspace and that I should * send. Compute lindx_, ipA, llen if it has not already been done for * this panel; */ k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; llen = lindxAU + k; llen_sv = llen + nprow; if( *iflag == -1 ) /* no index arrays have been computed so far */ { HPL_pipid( PANEL, ipl, ipID ); HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); *iflag = 0; } else if( *iflag == 1 ) /* HPL_pdlaswp01N called before: reuse ipID */ { HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); *iflag = 0; } /* * Copy the llen_sv into llen - Reset ipA to its correct value */ ipA = llen_sv[myrow]; for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } /* * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, * we directly pack into U, otherwise we pack into workspace. The first * entry of each column packed in workspace is in fact the row or column * offset in U where it should go to. */ if( myrow == icurrow ) { HPL_dlaswp01N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } else { HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); /* * Algorithm for bi-directional data exchange: * * As long as I have not talked to a process that already had the data * from icurrow, I will be sending the workspace, otherwise I will be * sending U. Note that the columns in workspace contain the local index * in U they should go to. * * If I am receiving from a process that has the data from icurrow, I * will be receiving in U, copy the data of U that stays into A, and * then the columns I have in workspace into U; otherwise I will be re- * ceiving in the remaining workspace. If I am one of those processes * that already has the data from icurrow, I will be immediately copying * the data I have in my workspace into U. * * When I receive U, some of U should be copied in my piece of A before * I can copy the rows I have in my workspace into U. This information * is kept in the lists lindx_: the row lindxAU[i] should be copied in * the row lindxA[i] of my piece of A, just as in the reversed initial * packing operation. Those rows are thus the first ones in the work ar- * ray. After this operation has been performed, I will not need * those lindx arrays, and I will always be sending a buffer of size * jb x n, or n x jb, that is, U. * * At every step of the algorithm, it is necesary to update the list * llen, so that I can figure out how large the next messages I will be * sending/receiving are. It is obvious when I am sending U. It is not * otherwise. * * We choose icurrow to be the source of the bi-directional exchange. * This allows the processes in the non-power 2 part to receive U at the * first exchange, and then broadcast internally this U so that those * processes can grab their piece of A. */ if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } ipW = ipA; Np2 = ( ( size_ = nprow - ip2 ) != 0 ); mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); /* * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] * receives local data from proc[i] for all i in [ip2..nprow); icurrow * is the source, these last process indexes are relative to icurrow. */ if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) { partner = MModAdd( icurrow, partner, nprow ); if( mydist == 0 ) /* I am the current row: I send U and recv W */ { (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, Cmsgid, partner, comm ); if( llen[partner] > 0 ) HPL_dlaswp03N( llen[partner], n, U, LDU, W, W+1, ldW ); } else if( mydist == ip2 ) { /* I recv U for later Bcast, I send my W */ (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, Cmsgid, partner, comm ); } else /* None of us is icurrow, we exchange our Ws */ { if( ( mydist & ip2 ) != 0 ) { (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); } else { (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, partner, Cmsgid, comm ); if( llen[partner] > 0 ) ipW += llen[partner]; } } } /* * Update llen */ for( i = 1; i < size_; i++ ) { iprow = MModAdd( icurrow, i, nprow ); partner = MModAdd( iprow, (int)(ip2), nprow ); llen[ iprow ] += llen[ partner ]; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); /* * power of 2 part of the processes collection: only processes [0..ip2) * are working; some of them (mydist >> (k+1) == 0) either send or re- * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a * process pair that exchanges U is such that (mydist >> (k+1) == 0). * Among those processes, the ones that are sending U are such that * mydist >> k == 0. */ if( mydist < ip2 ) { k = 0; while( k < hdim ) { partner = (int)(mydist ^ ipow); partner = MModAdd( icurrow, partner, nprow ); /* * Exchange and combine the local results - If I receive U, then I must * copy from U the rows that belong to my piece of A, and then update U * by copying in it the rows I have accumulated in W. Otherwise, I re- * ceive W. In this later case, and I have U, I shall update my copy of * U by copying in it the rows I have accumulated in W. If I did not * have U before, I simply need to update my pointer in W for later use. */ if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) { if( ( mydist >> (unsigned int)(k) ) == 0 ) { (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, Cmsgid, partner, comm ); HPL_dlaswp03N( llen[partner], n, U, LDU, Mptr( W, 0, ipW, ldW ), Mptr( W, 1, ipW, ldW ), ldW ); ipW += llen[partner]; } else { (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, Cmsgid, partner, comm ); HPL_dlaswp04N( ipA, llen[myrow], n, U, LDU, A, lda, W, W+1, ldW, lindxA, lindxAU ); } } else { (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, Cmsgid, partner, comm ); ipW += llen[partner]; } /* * Update llen - Go to next process pairs */ iprow = icurrow; ipdist = 0; do { if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) { partner = MModAdd( icurrow, partner, nprow ); llen[iprow] += llen[partner]; llen[partner] = llen[iprow]; } iprow = MModAdd( iprow, 1, nprow ); ipdist++; } while( ipdist < ip2 ); ipow <<= 1; k++; /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } } else { /* * non power of 2 part of the process collection: proc[ip2] broadcast U * to procs[ip2..nprow) (relatively to icurrow). */ if( size_ > 1 ) { k = size_ - 1; while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } root = MModAdd( icurrow, (int)(ip2), nprow ); mydis_ = (unsigned int)MModSub( myrow, root, nprow ); do { mask ^= ip2_; if( ( mydis_ & mask ) == 0 ) { partner = (int)(mydis_ ^ ip2_); if( ( mydis_ & ip2_ ) != 0 ) { (void) HPL_recv( U, usize, MModAdd( root, partner, nprow ), Cmsgid, comm ); } else if( partner < size_ ) { (void) HPL_send( U, usize, MModAdd( root, partner, nprow ), Cmsgid, comm ); } } ip2_ >>= 1; /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2_ > 0 ); } /* * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece * of A. */ HPL_dlaswp05N( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } /* * If nprow is not a power of 2, proc[i-ip2] sends global result to * proc[i] for all i in [ip2..nprow); */ if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) { partner = MModAdd( icurrow, partner, nprow ); if( ( mydist & ip2 ) != 0 ) { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } else { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } } if( vptr ) free( vptr ); /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * End of HPL_pdlaswp00N */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdlaswp00T.c0000644000000000000000000004147411256503657014650 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdlaswp00T ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdlaswp00T ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdlaswp00T applies the NB row interchanges to NN columns of the * trailing submatrix and broadcast a column panel. * * Bi-directional exchange is used to perform the swap :: broadcast of * the row panel U at once, resulting in a lower number of messages than * usual as well as a lower communication volume. With P process rows and * assuming bi-directional links, the running time of this function can * be approximated by: * * log_2(P) * (lat + NB*LocQ(N) / bdwth) * * where NB is the number of rows of the row panel U, N is the global * number of columns being updated, lat and bdwth are the latency and * bandwidth of the network for double precision real words. Mono * directional links will double this communication cost. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be broadcast and swapped) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be swapped and broadcast starting at * the current position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Comm comm; HPL_T_grid * grid; double * A, * U, * W; void * vptr = NULL; int * ipID, * lindxA, * lindxAU, * llen, * llen_sv; unsigned int ip2, ip2_=1, ipdist, ipow=1, mask=1, mydist, mydis_; int Cmsgid=MSGID_BEGIN_PFACT, Np2, align, hdim, i, icurrow, *iflag, ipA, ipW, *ipl, iprow, jb, k, lda, ldW, myrow, n, nprow, partner, root, size_, usize; #define LDU n /* .. * .. Executable Statements .. */ n = Mmin( NN, PANEL->n ); jb = PANEL->jb; /* * Quick return if there is nothing to do */ if( ( n <= 0 ) || ( jb <= 0 ) ) return; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * Retrieve parameters from the PANEL data structure */ grid = PANEL->grid; nprow = grid->nprow; myrow = grid->myrow; comm = grid->col_comm; ip2 = (unsigned int)grid->row_ip2; hdim = grid->row_hdim; align = PANEL->algo->align; A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; lda = PANEL->lda; icurrow = PANEL->prow; usize = jb * n; ldW = n + 1; /* * Allocate space for temporary W (ldW * jb) */ vptr = (void*)malloc( ( (size_t)(align) + ((size_t)(jb) * (size_t)(ldW))) * sizeof(double) ); if( vptr == NULL ) { HPL_pabort( __LINE__, "HPL_pdlaswp00T", "Memory allocation failed" ); } W = (double *)HPL_PTR( vptr, ((size_t)(align) * sizeof(double) ) ); /* * Construct ipID and its local counter parts lindxA, lindxAU - llen is * the number of rows/columns that I have in workspace and that I should * send. Compute lindx_, ipA, llen if it has not already been done for * this panel; */ k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; lindxA = ipID + ((unsigned int)(k) << 1); lindxAU = lindxA + k; llen = lindxAU + k; llen_sv = llen + nprow; if( *iflag == -1 ) /* no index arrays have been computed so far */ { HPL_pipid( PANEL, ipl, ipID ); HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); *iflag = 0; } else if( *iflag == 1 ) /* HPL_pdlaswp01T called before: reuse ipID */ { HPL_plindx0( PANEL, *ipl, ipID, lindxA, lindxAU, llen_sv ); *iflag = 0; } /* * Copy the llen_sv into llen - Reset ipA to its correct value */ ipA = llen_sv[myrow]; for( i = 0; i < nprow; i++ ) { llen[i] = llen_sv[i]; } /* * For i in [0..2*jb), lindxA[i] is the offset in A of a row that ulti- * mately goes to U( lindxAU[i], : ) or U( :, lindxAU[i] ). In icurrow, * we directly pack into U, otherwise we pack into workspace. The first * entry of each column packed in workspace is in fact the row or column * offset in U where it should go to. */ if( myrow == icurrow ) { HPL_dlaswp01T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } else { HPL_dlaswp02N( ipA, n, A, lda, W, W+1, ldW, lindxA, lindxAU ); } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); /* * Algorithm for bi-directional data exchange: * * As long as I have not talked to a process that already had the data * from icurrow, I will be sending the workspace, otherwise I will be * sending U. Note that the columns in workspace contain the local index * in U they should go to. * * If I am receiving from a process that has the data from icurrow, I * will be receiving in U, copy the data of U that stays into A, and * then the columns I have in workspace into U; otherwise I will be re- * ceiving in the remaining workspace. If I am one of those processes * that already has the data from icurrow, I will be immediately copying * the data I have in my workspace into U. * * When I receive U, some of U should be copied in my piece of A before * I can copy the rows I have in my workspace into U. This information * is kept in the lists lindx_: the row lindxAU[i] should be copied in * the row lindxA[i] of my piece of A, just as in the reversed initial * packing operation. Those rows are thus the first ones in the work ar- * ray. After this operation has been performed, I will not need * those lindx arrays, and I will always be sending a buffer of size * jb x n, or n x jb, that is, U. * * At every step of the algorithm, it is necesary to update the list * llen, so that I can figure out how large the next messages I will be * sending/receiving are. It is obvious when I am sending U. It is not * otherwise. * * We choose icurrow to be the source of the bi-directional exchange. * This allows the processes in the non-power 2 part to receive U at the * first exchange, and then broadcast internally this U so that those * processes can grab their piece of A. */ if( myrow == icurrow ) { llen[myrow] = 0; ipA = 0; } ipW = ipA; Np2 = ( ( size_ = nprow - ip2 ) != 0 ); mydist = (unsigned int)MModSub( myrow, icurrow, nprow ); /* * bi-directional exchange: If nprow is not a power of 2, proc[i-ip2] * receives local data from proc[i] for all i in [ip2..nprow); icurrow * is the source, these last process indexes are relative to icurrow. */ if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) { partner = MModAdd( icurrow, partner, nprow ); if( mydist == 0 ) /* I am the current row: I send U and recv W */ { (void) HPL_sdrv( U, usize, Cmsgid, W, llen[partner] * ldW, Cmsgid, partner, comm ); if( llen[partner] > 0 ) HPL_dlaswp03T( llen[partner], n, U, LDU, W, W+1, ldW ); } else if( mydist == ip2 ) { /* I recv U for later Bcast, I send my W */ (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, Cmsgid, partner, comm ); } else /* None of us is icurrow, we exchange our Ws */ { if( ( mydist & ip2 ) != 0 ) { (void) HPL_send( W, llen[myrow]*ldW, partner, Cmsgid, comm ); } else { (void) HPL_recv( Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, partner, Cmsgid, comm ); if( llen[partner] > 0 ) ipW += llen[partner]; } } } /* * Update llen */ for( i = 1; i < size_; i++ ) { iprow = MModAdd( icurrow, i, nprow ); partner = MModAdd( iprow, (int)(ip2), nprow ); llen[ iprow ] += llen[ partner ]; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); /* * power of 2 part of the processes collection: only processes [0..ip2) * are working; some of them (mydist >> (k+1) == 0) either send or re- * ceive U. At every step k, k is in [0 .. hdim), of the algorithm, a * process pair that exchanges U is such that (mydist >> (k+1) == 0). * Among those processes, the ones that are sending U are such that * mydist >> k == 0. */ if( mydist < ip2 ) { k = 0; while( k < hdim ) { partner = (int)(mydist ^ ipow); partner = MModAdd( icurrow, partner, nprow ); /* * Exchange and combine the local results - If I receive U, then I must * copy from U the rows that belong to my piece of A, and then update U * by copying in it the rows I have accumulated in W. Otherwise, I re- * ceive W. In this later case, and I have U, I shall update my copy of * U by copying in it the rows I have accumulated in W. If I did not * have U before, I simply need to update my pointer in W for later use. */ if( ( mydist >> (unsigned int)( k + 1 ) ) == 0 ) { if( ( mydist >> (unsigned int)(k) ) == 0 ) { (void) HPL_sdrv( U, usize, Cmsgid, Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, Cmsgid, partner, comm ); HPL_dlaswp03T( llen[partner], n, U, LDU, Mptr( W, 0, ipW, ldW ), Mptr( W, 1, ipW, ldW ), ldW ); ipW += llen[partner]; } else { (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, U, usize, Cmsgid, partner, comm ); HPL_dlaswp04T( ipA, llen[myrow], n, U, LDU, A, lda, W, W+1, ldW, lindxA, lindxAU ); } } else { (void) HPL_sdrv( W, llen[myrow]*ldW, Cmsgid, Mptr( W, 0, ipW, ldW ), llen[partner]*ldW, Cmsgid, partner, comm ); ipW += llen[partner]; } /* * Update llen - Go to next process pairs */ iprow = icurrow; ipdist = 0; do { if( (unsigned int)( partner = (int)(ipdist ^ ipow) ) > ipdist ) { partner = MModAdd( icurrow, partner, nprow ); llen[iprow] += llen[partner]; llen[partner] = llen[iprow]; } iprow = MModAdd( iprow, 1, nprow ); ipdist++; } while( ipdist < ip2 ); ipow <<= 1; k++; /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } } else { /* * non power of 2 part of the process collection: proc[ip2] broadcast U * to procs[ip2..nprow) (relatively to icurrow). */ if( size_ > 1 ) { k = size_ - 1; while( k > 1 ) { k >>= 1; ip2_ <<= 1; mask <<= 1; mask++; } root = MModAdd( icurrow, (int)(ip2), nprow ); mydis_ = (unsigned int)MModSub( myrow, root, nprow ); do { mask ^= ip2_; if( ( mydis_ & mask ) == 0 ) { partner = (int)(mydis_ ^ ip2_); if( ( mydis_ & ip2_ ) != 0 ) { (void) HPL_recv( U, usize, MModAdd( root, partner, nprow ), Cmsgid, comm ); } else if( partner < size_ ) { (void) HPL_send( U, usize, MModAdd( root, partner, nprow ), Cmsgid, comm ); } } ip2_ >>= 1; /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2_ > 0 ); } /* * Every process in [ip2..nprow) (relatively to icurrow) grabs its piece * of A. */ HPL_dlaswp05T( ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } /* * If nprow is not a power of 2, proc[i-ip2] sends global result to * proc[i] for all i in [ip2..nprow); */ if( ( Np2 != 0 ) && ( ( partner = (int)(mydist ^ ip2) ) < nprow ) ) { partner = MModAdd( icurrow, partner, nprow ); if( ( mydist & ip2 ) != 0 ) { (void) HPL_recv( U, usize, partner, Cmsgid, comm ); } else { (void) HPL_send( U, usize, partner, Cmsgid, comm ); } } if( vptr ) free( vptr ); /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * End of HPL_pdlaswp00T */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdlaswp01N.c0000644000000000000000000002151011256503657014630 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdlaswp01N ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdlaswp01N ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdlaswp01N applies the NB row interchanges to NN columns of the * trailing submatrix and broadcast a column panel. * * A "Spread then roll" algorithm performs the swap :: broadcast of the * row panel U at once, resulting in a minimal communication volume and * a "very good" use of the connectivity if available. With P process * rows and assuming bi-directional links, the running time of this * function can be approximated by: * * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth * * where NB is the number of rows of the row panel U, N is the global * number of columns being updated, lat and bdwth are the latency and * bandwidth of the network for double precision real words. K is * a constant in (2,3] that depends on the achieved bandwidth during a * simultaneous message exchange between two processes. An empirical * optimistic value of K is typically 2.4. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be swapped and broadcast starting at * the current position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * U; int * ipID, * iplen, * ipmap, * ipmapm1, * iwork, * lindxA = NULL, * lindxAU, * permU; static int equil=-1; int icurrow, * iflag, * ipA, * ipl, jb, k, lda, myrow, n, nprow; #define LDU jb /* .. * .. Executable Statements .. */ n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; /* * Quick return if there is nothing to do */ if( ( n <= 0 ) || ( jb <= 0 ) ) return; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * Decide whether equilibration should be performed or not */ if( equil == -1 ) equil = PANEL->algo->equil; /* * Retrieve parameters from the PANEL data structure */ nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; lda = PANEL->lda; icurrow = PANEL->prow; /* * Compute ipID (if not already done for this panel). lindxA and lindxAU * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 * are of size nprow, permU is of length jb, and this function needs a * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); */ k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; if( *iflag == -1 ) /* no index arrays have been computed so far */ { HPL_pipid( PANEL, ipl, ipID ); HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, ipmap, ipmapm1, permU, iwork ); *iflag = 1; } else if( *iflag == 0 ) /* HPL_pdlaswp00N called before: reuse ipID */ { HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, ipmap, ipmapm1, permU, iwork ); *iflag = 1; } else if( ( *iflag == 1 ) && ( equil != 0 ) ) { /* HPL_pdlaswp01N was call before only re-compute IPLEN, IPMAP */ HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); *iflag = 1; } /* * Copy into U the rows to be spread (local to icurrow) */ if( myrow == icurrow ) { HPL_dlaswp01N( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } /* * Spread U - optionally probe for column panel */ HPL_spreadN( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, ipmap, ipmapm1 ); /* * Local exchange (everywhere but in process row icurrow) */ if( myrow != icurrow ) { k = ipmapm1[myrow]; HPL_dlaswp06N( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, iplen[k], 0, LDU ), LDU, lindxA ); } /* * Equilibration */ if( equil != 0 ) HPL_equil( PBCST, IFLAG, PANEL, HplNoTrans, n, U, LDU, iplen, ipmap, ipmapm1, iwork ); /* * Rolling phase */ HPL_rollN( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); /* * Permute U in every process row */ HPL_dlaswp00N( jb, n, U, LDU, permU ); #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * End of HPL_pdlaswp01N */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdlaswp01T.c0000644000000000000000000002150511256503657014642 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdlaswp01T ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdlaswp01T ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdlaswp01T applies the NB row interchanges to NN columns of the * trailing submatrix and broadcast a column panel. * * A "Spread then roll" algorithm performs the swap :: broadcast of the * row panel U at once, resulting in a minimal communication volume and * a "very good" use of the connectivity if available. With P process * rows and assuming bi-directional links, the running time of this * function can be approximated by: * * (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth * * where NB is the number of rows of the row panel U, N is the global * number of columns being updated, lat and bdwth are the latency and * bandwidth of the network for double precision real words. K is * a constant in (2,3] that depends on the achieved bandwidth during a * simultaneous message exchange between two processes. An empirical * optimistic value of K is typically 2.4. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be swapped and broadcast starting at * the current position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * A, * U; int * ipID, * iplen, * ipmap, * ipmapm1, * iwork, * lindxA = NULL, * lindxAU, * permU; static int equil=-1; int icurrow, * iflag, * ipA, * ipl, jb, k, lda, myrow, n, nprow; #define LDU n /* .. * .. Executable Statements .. */ n = PANEL->n; n = Mmin( NN, n ); jb = PANEL->jb; /* * Quick return if there is nothing to do */ if( ( n <= 0 ) || ( jb <= 0 ) ) return; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * Decide whether equilibration should be performed or not */ if( equil == -1 ) equil = PANEL->algo->equil; /* * Retrieve parameters from the PANEL data structure */ nprow = PANEL->grid->nprow; myrow = PANEL->grid->myrow; A = PANEL->A; U = PANEL->U; iflag = PANEL->IWORK; lda = PANEL->lda; icurrow = PANEL->prow; /* * Compute ipID (if not already done for this panel). lindxA and lindxAU * are of length at most 2*jb - iplen is of size nprow+1, ipmap, ipmapm1 * are of size nprow, permU is of length jb, and this function needs a * workspace of size max( 2 * jb (plindx1), nprow+1(equil)): * 1(iflag) + 1(ipl) + 1(ipA) + 9*jb + 3*nprow + 1 + MAX(2*jb,nprow+1) * i.e. 4 + 9*jb + 3*nprow + max(2*jb, nprow+1); */ k = (int)((unsigned int)(jb) << 1); ipl = iflag + 1; ipID = ipl + 1; ipA = ipID + ((unsigned int)(k) << 1); lindxA = ipA + 1; lindxAU = lindxA + k; iplen = lindxAU + k; ipmap = iplen + nprow + 1; ipmapm1 = ipmap + nprow; permU = ipmapm1 + nprow; iwork = permU + jb; if( *iflag == -1 ) /* no index arrays have been computed so far */ { HPL_pipid( PANEL, ipl, ipID ); HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, ipmap, ipmapm1, permU, iwork ); *iflag = 1; } else if( *iflag == 0 ) /* HPL_pdlaswp00T called before: reuse ipID */ { HPL_plindx1( PANEL, *ipl, ipID, ipA, lindxA, lindxAU, iplen, ipmap, ipmapm1, permU, iwork ); *iflag = 1; } else if( ( *iflag == 1 ) && ( equil != 0 ) ) { /* HPL_pdlaswp01T was call before only re-compute IPLEN, IPMAP */ HPL_plindx10( PANEL, *ipl, ipID, iplen, ipmap, ipmapm1 ); *iflag = 1; } /* * Copy into U the rows to be spread (local to icurrow) */ if( myrow == icurrow ) { HPL_dlaswp01T( *ipA, n, A, lda, U, LDU, lindxA, lindxAU ); } /* * Spread U - optionally probe for column panel */ HPL_spreadT( PBCST, IFLAG, PANEL, HplRight, n, U, LDU, 0, iplen, ipmap, ipmapm1 ); /* * Local exchange (everywhere but in process row icurrow) */ if( myrow != icurrow ) { k = ipmapm1[myrow]; HPL_dlaswp06T( iplen[k+1]-iplen[k], n, A, lda, Mptr( U, 0, iplen[k], LDU ), LDU, lindxA ); } /* * Equilibration */ if( equil != 0 ) HPL_equil( PBCST, IFLAG, PANEL, HplTrans, n, U, LDU, iplen, ipmap, ipmapm1, iwork ); /* * Rolling phase */ HPL_rollT( PBCST, IFLAG, PANEL, n, U, LDU, iplen, ipmap, ipmapm1 ); /* * Permute U in every process row */ HPL_dlaswp10N( n, jb, U, LDU, permU ); #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); #endif /* * End of HPL_pdlaswp01T */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdtrsv.c0000644000000000000000000002672011256503657014231 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdtrsv ( HPL_T_grid * GRID, HPL_T_pmat * AMAT ) #else void HPL_pdtrsv ( GRID, AMAT ) HPL_T_grid * GRID; HPL_T_pmat * AMAT; #endif { /* * Purpose * ======= * * HPL_pdtrsv solves an upper triangular system of linear equations. * * The rhs is the last column of the N by N+1 matrix A. The solve starts * in the process column owning the Nth column of A, so the rhs b may * need to be moved one process column to the left at the beginning. The * routine therefore needs a column vector in every process column but * the one owning b. The result is replicated in all process rows, and * returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. * * The algorithm uses decreasing one-ring broadcast in process rows and * columns implemented in terms of synchronous communication point to * point primitives. The lookahead of depth 1 is used to minimize the * critical path. This entire operation is essentially ``latency'' bound * and an estimate of its running time is given by: * * (move rhs) lat + N / ( P bdwth ) + * (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + * gam2 N^2 / ( P Q ), * * where gam2 is an estimate of the Level 2 BLAS rate of execution. * There are N / NB diagonal blocks. One must exchange 2 messages of * length NB to compute the next NB entries of the vector solution, as * well as performing a total of N^2 floating point operations. * * Arguments * ========= * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * AMAT (local input/output) HPL_T_pmat * * On entry, AMAT points to the data structure containing the * local array information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Comm Ccomm, Rcomm; double * A=NULL, * Aprev=NULL, * Aptr, * XC=NULL, * XR=NULL, * Xd=NULL, * Xdprev=NULL, * W=NULL; int Alcol, Alrow, Anpprev, Anp, Anq, Bcol, Cmsgid, GridIsNotPx1, GridIsNot1xQ, Rmsgid, Wfr=0, colprev, kb, kbprev, lda, mycol, myrow, n, n1, n1p, n1pprev=0, nb, npcol, nprow, rowprev, tmp1, tmp2; /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PTRSV ); #endif if( ( n = AMAT->n ) <= 0 ) return; nb = AMAT->nb; lda = AMAT->ld; A = AMAT->A; XR = AMAT->X; (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); Rcomm = GRID->row_comm; Rmsgid = MSGID_BEGIN_PTRSV; Ccomm = GRID->col_comm; Cmsgid = MSGID_BEGIN_PTRSV + 1; GridIsNot1xQ = ( nprow > 1 ); GridIsNotPx1 = ( npcol > 1 ); /* * Move the rhs in the process column owning the last column of A. */ Mnumroc( Anp, n, nb, nb, myrow, 0, nprow ); Mnumroc( Anq, n, nb, nb, mycol, 0, npcol ); tmp1 = ( n - 1 ) / nb; Alrow = tmp1 - ( tmp1 / nprow ) * nprow; Alcol = tmp1 - ( tmp1 / npcol ) * npcol; kb = n - tmp1 * nb; Aptr = (double *)(A); XC = Mptr( Aptr, 0, Anq, lda ); Mindxg2p( n, nb, nb, Bcol, 0, npcol ); if( ( Anp > 0 ) && ( Alcol != Bcol ) ) { if( mycol == Bcol ) { (void) HPL_send( XC, Anp, Alcol, Rmsgid, Rcomm ); } else if( mycol == Alcol ) { (void) HPL_recv( XC, Anp, Bcol, Rmsgid, Rcomm ); } } Rmsgid = ( Rmsgid + 2 > MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid + 2 ); if( mycol != Alcol ) { for( tmp1=0; tmp1 < Anp; tmp1++ ) XC[tmp1] = HPL_rzero; } /* * Set up lookahead */ n1 = ( npcol - 1 ) * nb; n1 = Mmax( n1, nb ); if( Anp > 0 ) { W = (double*)malloc( (size_t)(Mmin( n1, Anp )) * sizeof( double ) ); if( W == NULL ) { HPL_pabort( __LINE__, "HPL_pdtrsv", "Memory allocation failed" ); } Wfr = 1; } Anpprev = Anp; Xdprev = XR; Aprev = Aptr = Mptr( Aptr, 0, Anq, lda ); tmp1 = n - kb; tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); MnumrocI( n1pprev, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); if( myrow == Alrow ) { Anpprev = ( Anp -= kb ); } if( mycol == Alcol ) { Aprev = ( Aptr -= lda * kb ); Anq -= kb; Xdprev = ( Xd = XR + Anq ); if( myrow == Alrow ) { HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, kb, Aptr+Anp, lda, XC+Anp, 1 ); HPL_dcopy( kb, XC+Anp, 1, Xd, 1 ); } } rowprev = Alrow; Alrow = MModSub1( Alrow, nprow ); colprev = Alcol; Alcol = MModSub1( Alcol, npcol ); kbprev = kb; n -= kb; tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); /* * Start the operations */ while( n > 0 ) { if( mycol == Alcol ) { Aptr -= lda * kb; Anq -= kb; Xd = XR + Anq; } if( myrow == Alrow ) { Anp -= kb; } /* * Broadcast (decreasing-ring) of previous solution block in previous * process column, compute partial update of current block and send it * to current process column. */ if( mycol == colprev ) { /* * Send previous solution block in process row above */ if( myrow == rowprev ) { if( GridIsNot1xQ ) (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), Cmsgid, Ccomm ); } else { (void) HPL_recv( Xdprev, kbprev, MModAdd1( myrow, nprow ), Cmsgid, Ccomm ); } /* * Compute partial update of previous solution block and send it to cur- * rent column */ if( n1pprev > 0 ) { tmp1 = Anpprev - n1pprev; HPL_dgemv( HplColumnMajor, HplNoTrans, n1pprev, kbprev, -HPL_rone, Aprev+tmp1, lda, Xdprev, 1, HPL_rone, XC+tmp1, 1 ); if( GridIsNotPx1 ) (void) HPL_send( XC+tmp1, n1pprev, Alcol, Rmsgid, Rcomm ); } /* * Finish the (decreasing-ring) broadcast of the solution block in pre- * vious process column */ if( ( myrow != rowprev ) && ( myrow != MModAdd1( rowprev, nprow ) ) ) (void) HPL_send( Xdprev, kbprev, MModSub1( myrow, nprow ), Cmsgid, Ccomm ); } else if( mycol == Alcol ) { /* * Current column receives and accumulates partial update of previous * solution block */ if( n1pprev > 0 ) { (void) HPL_recv( W, n1pprev, colprev, Rmsgid, Rcomm ); HPL_daxpy( n1pprev, HPL_rone, W, 1, XC+Anpprev-n1pprev, 1 ); } } /* * Solve current diagonal block */ if( ( mycol == Alcol ) && ( myrow == Alrow ) ) { HPL_dtrsv( HplColumnMajor, HplUpper, HplNoTrans, HplNonUnit, kb, Aptr+Anp, lda, XC+Anp, 1 ); HPL_dcopy( kb, XC+Anp, 1, XR+Anq, 1 ); } /* * Finish previous update */ if( ( mycol == colprev ) && ( ( tmp1 = Anpprev - n1pprev ) > 0 ) ) HPL_dgemv( HplColumnMajor, HplNoTrans, tmp1, kbprev, -HPL_rone, Aprev, lda, Xdprev, 1, HPL_rone, XC, 1 ); /* * Save info of current step and update info for the next step */ if( mycol == Alcol ) { Xdprev = Xd; Aprev = Aptr; } if( myrow == Alrow ) { Anpprev -= kb; } rowprev = Alrow; colprev = Alcol; n1pprev = n1p; kbprev = kb; n -= kb; Alrow = MModSub1( Alrow, nprow ); Alcol = MModSub1( Alcol, npcol ); tmp1 = n - ( kb = nb ); tmp1 -= ( tmp2 = Mmin( tmp1, n1 ) ); MnumrocI( n1p, tmp2, Mmax( 0, tmp1 ), nb, nb, myrow, 0, nprow ); Rmsgid = ( Rmsgid+2 > MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV : Rmsgid+2 ); Cmsgid = ( Cmsgid+2 > MSGID_END_PTRSV ? MSGID_BEGIN_PTRSV+1 : Cmsgid+2 ); } /* * Replicate last solution block */ if( mycol == colprev ) (void) HPL_broadcast( (void *)(XR), kbprev, HPL_DOUBLE, rowprev, Ccomm ); if( Wfr ) free( W ); #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_PTRSV ); #endif /* * End of HPL_pdtrsv */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdupdateNN.c0000644000000000000000000003624211256503657014751 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdupdateNN ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdupdateNN ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously * applies the row interchanges and updates part of the trailing (using * the panel PANEL) submatrix. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local output) int * * On exit, IFLAG indicates whether or not the broadcast has * been completed when PBCST is not NULL on entry. In that case, * IFLAG is left unchanged. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be updated) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be updated starting at the current * position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; int * ipiv; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; #endif int curr, i, iroff, jb, lda, ldl2, mp, n, nb, nq0, nn, test; static int tswap = 0; static HPL_T_SWAP fswap = HPL_NO_SWP; #define LDU jb /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; if( NN >= 0 ) n = Mmin( NN, n ); /* * There is nothing to update, enforce the panel broadcast. */ if( ( n <= 0 ) || ( jb <= 0 ) ) { if( PBCST != NULL ) { do { (void) HPL_bcast( PBCST, IFLAG ); } while( *IFLAG != HPL_SUCCESS ); } #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif return; } /* * Enable/disable the column panel probing mechanism */ (void) HPL_bcast( PBCST, &test ); /* * 1 x Q case */ if( PANEL->grid->nprow == 1 ) { Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } /* * So far we have not updated anything - test availability of the panel * to be forwarded - If detected forward it and finish the update in one * step. */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); /* * Update nb columns at a time */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } else /* nprow > 1 ... */ { /* * Selection of the swapping algorithm - swap:broadcast U. */ if( fswap == HPL_NO_SWP ) { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } if( ( fswap == HPL_SWAP01 ) || ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } else { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } /* * Compute redundantly row block of U and update trailing submatrix */ nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; Uptr = PANEL->U; ldl2 = PANEL->ldl2; mp = PANEL->mp - ( curr != 0 ? jb : 0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif /* * Broadcast has not occured yet, spliting the computational part */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } Uptr = Mptr( Uptr, 0, nn, LDU ); Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Uv0 ); (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; /* * return the outcome of the probe (should always be HPL_SUCCESS, the * panel broadcast is enforced in that routine). */ if( PBCST != NULL ) *IFLAG = test; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif /* * End of HPL_pdupdateNN */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdupdateNT.c0000644000000000000000000003623211256503657014756 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdupdateNT ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdupdateNT ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously * applies the row interchanges and updates part of the trailing (using * the panel PANEL) submatrix. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local output) int * * On exit, IFLAG indicates whether or not the broadcast has * been completed when PBCST is not NULL on entry. In that case, * IFLAG is left unchanged. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be updated) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be updated starting at the current * position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; int * ipiv; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; #endif int curr, i, iroff, jb, lda, ldl2, mp, n, nb, nq0, nn, test; static int tswap = 0; static HPL_T_SWAP fswap = HPL_NO_SWP; #define LDU n /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; if( NN >= 0 ) n = Mmin( NN, n ); /* * There is nothing to update, enforce the panel broadcast. */ if( ( n <= 0 ) || ( jb <= 0 ) ) { if( PBCST != NULL ) { do { (void) HPL_bcast( PBCST, IFLAG ); } while( *IFLAG != HPL_SUCCESS ); } #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif return; } /* * Enable/disable the column panel probing mechanism */ (void) HPL_bcast( PBCST, &test ); /* * 1 x Q case */ if( PANEL->grid->nprow == 1 ) { Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } /* * So far we have not updated anything - test availability of the panel * to be forwarded - If detected forward it and finish the update in one * step. */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); /* * Update nb columns at a time */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplLower, HplNoTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } else /* nprow > 1 ... */ { /* * Selection of the swapping algorithm - swap:broadcast U. */ if( fswap == HPL_NO_SWP ) { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } if( ( fswap == HPL_SWAP01 ) || ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } else { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } /* * Compute redundantly row block of U and update trailing submatrix */ nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; Uptr = PANEL->U; ldl2 = PANEL->ldl2; mp = PANEL->mp - ( curr != 0 ? jb : 0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif /* * Broadcast has not occured yet, spliting the computational part */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } Uptr = Mptr( Uptr, nn, 0, LDU ); Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { HPL_dtrsm( HplColumnMajor, HplRight, HplLower, HplTrans, HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Uv0 ); (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; /* * return the outcome of the probe (should always be HPL_SUCCESS, the * panel broadcast is enforced in that routine). */ if( PBCST != NULL ) *IFLAG = test; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif /* * End of HPL_pdupdateNT */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdupdateTN.c0000644000000000000000000003624011256503657014755 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdupdateTN ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdupdateTN ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously * applies the row interchanges and updates part of the trailing (using * the panel PANEL) submatrix. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local output) int * * On exit, IFLAG indicates whether or not the broadcast has * been completed when PBCST is not NULL on entry. In that case, * IFLAG is left unchanged. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be updated) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be updated starting at the current * position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; int * ipiv; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; #endif int curr, i, iroff, jb, lda, ldl2, mp, n, nb, nq0, nn, test; static int tswap = 0; static HPL_T_SWAP fswap = HPL_NO_SWP; #define LDU jb /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; if( NN >= 0 ) n = Mmin( NN, n ); /* * There is nothing to update, enforce the panel broadcast. */ if( ( n <= 0 ) || ( jb <= 0 ) ) { if( PBCST != NULL ) { do { (void) HPL_bcast( PBCST, IFLAG ); } while( *IFLAG != HPL_SUCCESS ); } #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif return; } /* * Enable/disable the column panel probing mechanism */ (void) HPL_bcast( PBCST, &test ); /* * 1 x Q case */ if( PANEL->grid->nprow == 1 ) { Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } /* * So far we have not updated anything - test availability of the panel * to be forwarded - If detected forward it and finish the update in one * step. */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); /* * Update nb columns at a time */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } else /* nprow > 1 ... */ { /* * Selection of the swapping algorithm - swap:broadcast U. */ if( fswap == HPL_NO_SWP ) { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } if( ( fswap == HPL_SWAP01 ) || ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) { HPL_pdlaswp01N( PBCST, &test, PANEL, n ); } else { HPL_pdlaswp00N( PBCST, &test, PANEL, n ); } /* * Compute redundantly row block of U and update trailing submatrix */ nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; Uptr = PANEL->U; ldl2 = PANEL->ldl2; mp = PANEL->mp - ( curr != 0 ? jb : 0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, n ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif /* * Broadcast has not occured yet, spliting the computational part */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } Uptr = Mptr( Uptr, 0, nn, LDU ); Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlacpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, 0, nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Uv0 ); (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; /* * return the outcome of the probe (should always be HPL_SUCCESS, the * panel broadcast is enforced in that routine). */ if( PBCST != NULL ) *IFLAG = test; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif /* * End of HPL_pdupdateTN */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pdupdateTT.c0000644000000000000000000003623111256503657014763 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdupdateTT ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN ) #else void HPL_pdupdateTT ( PBCST, IFLAG, PANEL, NN ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int NN; #endif { /* * Purpose * ======= * * HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously * applies the row interchanges and updates part of the trailing (using * the panel PANEL) submatrix. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local output) int * * On exit, IFLAG indicates whether or not the broadcast has * been completed when PBCST is not NULL on entry. In that case, * IFLAG is left unchanged. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be updated) information. * * NN (local input) const int * On entry, NN specifies the local number of columns of the * trailing submatrix to be updated starting at the current * position. NN must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double * Aptr, * L1ptr, * L2ptr, * Uptr, * dpiv; int * ipiv; #ifdef HPL_CALL_VSIPL vsip_mview_d * Av0, * Av1, * Lv0, * Lv1, * Uv0, * Uv1; #endif int curr, i, iroff, jb, lda, ldl2, mp, n, nb, nq0, nn, test; static int tswap = 0; static HPL_T_SWAP fswap = HPL_NO_SWP; #define LDU n /* .. * .. Executable Statements .. */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif nb = PANEL->nb; jb = PANEL->jb; n = PANEL->nq; lda = PANEL->lda; if( NN >= 0 ) n = Mmin( NN, n ); /* * There is nothing to update, enforce the panel broadcast. */ if( ( n <= 0 ) || ( jb <= 0 ) ) { if( PBCST != NULL ) { do { (void) HPL_bcast( PBCST, IFLAG ); } while( *IFLAG != HPL_SUCCESS ); } #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif return; } /* * Enable/disable the column panel probing mechanism */ (void) HPL_bcast( PBCST, &test ); /* * 1 x Q case */ if( PANEL->grid->nprow == 1 ) { Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; ldl2 = PANEL->ldl2; dpiv = PANEL->DPIV; ipiv = PANEL->IWORK; mp = PANEL->mp - jb; iroff = PANEL->ii; nq0 = 0; #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif for( i = 0; i < jb; i++ ) { ipiv[i] = (int)(dpiv[i]) - iroff; } /* * So far we have not updated anything - test availability of the panel * to be forwarded - If detected forward it and finish the update in one * step. */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); /* * Update nb columns at a time */ #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_LASWP ); HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); HPL_ptimer( HPL_TIMING_LASWP ); #else HPL_dlaswp00N( jb, nn, Aptr, lda, ipiv ); #endif HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper, HplTrans, HplUnit, jb, nn, HPL_rone, L1ptr, jb, Aptr, lda ); #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, jb, nn ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_NTRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Aptr, lda, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } else /* nprow > 1 ... */ { /* * Selection of the swapping algorithm - swap:broadcast U. */ if( fswap == HPL_NO_SWP ) { fswap = PANEL->algo->fswap; tswap = PANEL->algo->fsthr; } if( ( fswap == HPL_SWAP01 ) || ( ( fswap == HPL_SW_MIX ) && ( n > tswap ) ) ) { HPL_pdlaswp01T( PBCST, &test, PANEL, n ); } else { HPL_pdlaswp00T( PBCST, &test, PANEL, n ); } /* * Compute redundantly row block of U and update trailing submatrix */ nq0 = 0; curr = ( PANEL->grid->myrow == PANEL->prow ? 1 : 0 ); Aptr = PANEL->A; L2ptr = PANEL->L2; L1ptr = PANEL->L1; Uptr = PANEL->U; ldl2 = PANEL->ldl2; mp = PANEL->mp - ( curr != 0 ? jb : 0 ); #ifdef HPL_CALL_VSIPL /* * Admit the blocks */ (void) vsip_blockadmit_d( PANEL->Ablock, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->L2block, VSIP_TRUE ); (void) vsip_blockadmit_d( PANEL->Ublock, VSIP_TRUE ); /* * Create the matrix views */ Av0 = vsip_mbind_d( PANEL->Ablock, 0, 1, lda, lda, PANEL->pmat->nq ); Lv0 = vsip_mbind_d( PANEL->L2block, 0, 1, ldl2, ldl2, jb ); Uv0 = vsip_mbind_d( PANEL->Ublock, 0, 1, LDU, LDU, jb ); /* * Create the matrix subviews */ Lv1 = vsip_msubview_d( Lv0, 0, 0, mp, jb ); #endif /* * Broadcast has not occured yet, spliting the computational part */ while ( test == HPL_KEEP_TESTING ) { nn = n - nq0; nn = Mmin( nb, nn ); HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } Uptr = Mptr( Uptr, nn, 0, LDU ); Aptr = Mptr( Aptr, 0, nn, lda ); nq0 += nn; (void) HPL_bcast( PBCST, &test ); } /* * The panel has been forwarded at that point, finish the update */ if( ( nn = n - nq0 ) > 0 ) { HPL_dtrsm( HplColumnMajor, HplRight, HplUpper, HplNoTrans, HplUnit, nn, jb, HPL_rone, L1ptr, jb, Uptr, LDU ); if( curr != 0 ) { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii+jb, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Mptr( Aptr, jb, 0, lda ), lda ); #endif HPL_dlatcpy( jb, nn, Uptr, LDU, Aptr, lda ); } else { #ifdef HPL_CALL_VSIPL /* * Create the matrix subviews */ Uv1 = vsip_msubview_d( Uv0, nq0, 0, nn, jb ); Av1 = vsip_msubview_d( Av0, PANEL->ii, PANEL->jj+nq0, mp, nn ); vsip_gemp_d( -HPL_rone, Lv1, VSIP_MAT_NTRANS, Uv1, VSIP_MAT_TRANS, HPL_rone, Av1 ); /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Av1 ); (void) vsip_mdestroy_d( Uv1 ); #else HPL_dgemm( HplColumnMajor, HplNoTrans, HplTrans, mp, nn, jb, -HPL_rone, L2ptr, ldl2, Uptr, LDU, HPL_rone, Aptr, lda ); #endif } } #ifdef HPL_CALL_VSIPL /* * Destroy the matrix subviews */ (void) vsip_mdestroy_d( Lv1 ); /* * Release the blocks */ (void) vsip_blockrelease_d( vsip_mgetblock_d( Uv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Lv0 ), VSIP_TRUE ); (void) vsip_blockrelease_d( vsip_mgetblock_d( Av0 ), VSIP_TRUE ); /* * Destroy the matrix views */ (void) vsip_mdestroy_d( Uv0 ); (void) vsip_mdestroy_d( Lv0 ); (void) vsip_mdestroy_d( Av0 ); #endif } PANEL->A = Mptr( PANEL->A, 0, n, lda ); PANEL->nq -= n; PANEL->jj += n; /* * return the outcome of the probe (should always be HPL_SUCCESS, the * panel broadcast is enforced in that routine). */ if( PBCST != NULL ) *IFLAG = test; #ifdef HPL_DETAILED_TIMING HPL_ptimer( HPL_TIMING_UPDATE ); #endif /* * End of HPL_pdupdateTT */ } hpcc-1.4.1/hpl/src/pgesv/HPL_perm.c0000644000000000000000000001354111256503657013647 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_perm ( const int N, int * LINDXA, int * LINDXAU, int * IWORK ) #else void HPL_perm ( N, LINDXA, LINDXAU, IWORK ) const int N; int * LINDXA; int * LINDXAU; int * IWORK; #endif { /* * Purpose * ======= * * HPL_perm combines two index arrays and generate the corresponding * permutation. First, this function computes the inverse of LINDXA, and * then combine it with LINDXAU. Second, in order to be able to perform * the permutation in place, LINDXAU is overwritten by the sequence of * permutation producing the same result. What we ultimately want to * achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the * call to this function, this in place permutation can be performed by * for i in [0..N) swap U[i] with U[LINDXAU[i]]. * * Arguments * ========= * * N (global input) const int * On entry, N specifies the length of the arrays LINDXA and * LINDXAU. N should be at least zero. * * LINDXA (global input/output) int * * On entry, LINDXA is an array of dimension N containing the * source indexes. On exit, LINDXA contains the combined index * array. * * LINDXAU (global input/output) int * * On entry, LINDXAU is an array of dimension N containing the * target indexes. On exit, LINDXAU contains the sequence of * permutation, that should be applied in increasing order to * permute the underlying array U in place. * * IWORK (workspace) int * * On entry, IWORK is a workarray of dimension N. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, j, k, fndd; /* .. * .. Executable Statements .. */ /* * Inverse LINDXA - combine LINDXA and LINDXAU - Initialize IWORK */ for( i = 0; i < N; i++ ) { IWORK[LINDXA[i]] = i; } for( i = 0; i < N; i++ ) { LINDXA[i] = LINDXAU[IWORK[i]]; IWORK[i] = i; } for( i = 0; i < N; i++ ) { /* search LINDXA such that LINDXA[j] == i */ j = 0; do { fndd = ( LINDXA[j] == i ); j++; } while( !fndd ); j--; /* search IWORK such that IWORK[k] == j */ k = 0; do { fndd = ( IWORK[k] == j ); k++; } while( !fndd ); k--; /* swap IWORK[i] and IWORK[k]; LINDXAU[i] = k */ j = IWORK[i]; IWORK[i] = IWORK[k]; IWORK[k] = j; LINDXAU[i] = k; } /* * End of HPL_perm */ } hpcc-1.4.1/hpl/src/pgesv/HPL_pipid.c0000644000000000000000000002030311256503657014003 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pipid ( HPL_T_panel * PANEL, int * K, int * IPID ) #else void HPL_pipid ( PANEL, K, IPID ) HPL_T_panel * PANEL; int * K; int * IPID; #endif { /* * Purpose * ======= * * HPL_pipid computes an array IPID that contains the source and final * destination of matrix rows resulting from the application of N * interchanges as computed by the LU factorization with row partial * pivoting. The array IPID is such that the row of global index IPID(i) * should be mapped onto the row of global index IPID(i+1). Note that we * cannot really know the length of IPID a priori. However, we know that * this array is at least 2*N long, since there are N rows to swap and * broadcast. The length of this array must be smaller than or equal to * 4*N, since every row is swapped with at most a single distinct remote * row. The algorithm constructing IPID goes as follows: Let IA be the * global index of the first row to be swapped. * * For every row src IA + i with i in [0..N) to be swapped with row dst * such that dst is given by DPIV[i]: * * Is row src the destination of a previous row of the current block, * that is, is there k odd such that IPID(k) is equal to src ? * Yes: update this destination with dst. For example, if the * pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, * we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it * was thought so far ... * No : add the pair (src,dst) at the end of IPID; row src has not * been moved yet. * * Is row dst different from src the destination of a previous row of * the current block, i.e., is there k odd such that IPID(k) is equal to * dst ? * Yes: update IPID(k) with src. For example, if the pivot array * is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in * fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought * so far ... * No : add the pair (dst,src) at the end of IPID; row dst has not * been moved yet. * * Note that when src is equal to dst, the pair (dst,src) should not be * added to IPID in order to avoid duplicated entries in this array. * During the construction of the array IPID, we make sure that the * first N entries are such that IPID(k) with k odd is equal to IA+k/2. * For k in [0..K/2), the row of global index IPID(2*k) should be * mapped onto the row of global index IPID(2*k+1). * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * K (global output) int * * On exit, K specifies the number of entries in IPID. K is at * least 2*N, and at most 4*N. * * IPID (global output) int * * On entry, IPID is an array of length 4*N. On exit, the first * K entries of that array contain the src and final destination * resulting from the application of the N interchanges as * specified by DPIV. The pairs (src,dst) are contiguously * stored and sorted so that IPID(2*i+1) is equal to IA+i with i * in [0..N) * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int dst, fndd, fnds, ia, i, j, jb, lst, off, src; double * dpiv; /* .. * .. Executable Statements .. */ dpiv = PANEL->DPIV; jb = PANEL->jb; src = ia = PANEL->ia; dst = (int)(dpiv[0]); IPID[0] = dst; IPID[1] = src; *K = 2; if( src != dst ) { IPID[2] = src; IPID[3] = dst; *K += 2; } for( i = 1; i < jb; i++ ) { fnds = 0; j = 1; if( ( src = ia + i ) == ( dst = (int)(dpiv[i]) ) ) { do { if( src == IPID[j] ) { fnds = j; } else { j += 2; } } while( !( fnds ) && ( j < *K ) ); if( !fnds ) { lst = *K; off = 2; IPID[lst] = src; } else { lst = fnds-1; off = 0; } IPID[lst+1] = dst; } else { fndd = 0; do { if ( src == IPID[j] ) { fnds = j; } else if( dst == IPID[j] ) { fndd = j; } j += 2; } while( ( !( fnds ) || !( fndd ) ) && ( j < *K ) ); if( !fnds ) { IPID[*K] = src; IPID[*K+1] = dst; off = 2; } else { IPID[fnds] = dst; off = 0; } if( !fndd ) { lst = *K+off; IPID[lst ] = dst; off += 2; } else { lst = fndd-1; } IPID[lst+1] = src; } /* * Enforce IPID(1,i) equal to src = ia + i */ if( lst != ( j = ( i << 1 ) ) ) { src = IPID[j ]; IPID[j ] = IPID[lst ]; IPID[lst ] = src; dst = IPID[j+1]; IPID[j+1] = IPID[lst+1]; IPID[lst+1] = dst; } *K += off; } /* * End of HPL_pipid */ } hpcc-1.4.1/hpl/src/pgesv/HPL_plindx0.c0000644000000000000000000003047311256503657014265 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_plindx0 ( HPL_T_panel * PANEL, const int K, int * IPID, int * LINDXA, int * LINDXAU, int * LLEN ) #else void HPL_plindx0 ( PANEL, K, IPID, LINDXA, LINDXAU, LLEN ) HPL_T_panel * PANEL; const int K; int * IPID; int * LINDXA; int * LINDXAU; int * LLEN; #endif { /* * Purpose * ======= * * HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing * the local source and final destination position resulting from the * application of row interchanges. * * On entry, the array IPID of length K is such that the row of global * index IPID(i) should be mapped onto row of global index IPID(i+1). * Let IA be the global index of the first row to be swapped. For k in * [0..K/2), the row of global index IPID(2*k) should be mapped onto the * row of global index IPID(2*k+1). The question then, is to determine * which rows should ultimately be part of U. * * First, some rows of the process ICURROW may be swapped locally. One * of this row belongs to U, the other one belongs to my local piece of * A. The other rows of the current block are swapped with remote rows * and are thus not part of U. These rows however should be sent along, * and grabbed by the other processes as we progress in the exchange * phase. * * So, assume that I am ICURROW and consider a row of index IPID(2*i) * that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less * than N, this row is locally swapped and should be copied into U at * the position IPID(2*i+1) - IA. No row will be exchanged for this one. * If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be * locally copied into my local piece of A at the position corresponding * to the row of global index IPID(2*i+1). * * If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) * is to be swapped away and strictly speaking does not belong to U, but * to A remotely. Since this process will however send this array U, * this row is copied into U, exactly where the row IPID(2*i+1) should * go. For this, we search IPID for k1, such that IPID(2*k1) is equal to * IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position * IPID(2*k1+1)-IA. * * It is thus important to put the rows that go into U, i.e., such that * IPID(2*i+1) - IA is less than N at the begining of the array IPID. By * doing so, U is formed, and the local copy is performed in just one * sweep. * * Two lists LINDXA and LINDXAU are built. LINDXA contains the local * index of the rows I have that should be copied. LINDXAU contains the * local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A * is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) * of A should be locally copied into A(-LINDXAU(k),:). In the process * ICURROW, the initial packing algorithm proceeds as follows. * * for all entries in IPID, * if IPID(2*i) is in ICURROW, * if IPID(2*i+1) is in ICURROW, * if( IPID(2*i+1) - IA < N ) * save corresponding local position * of this row (LINDXA); * save local position (LINDXAU) in U * where this row goes; * [copy row IPID(2*i) in U at position * IPID(2*i+1)-IA; ]; * else * save corresponding local position of * this row (LINDXA); * save local position (-LINDXAU) in A * where this row goes; * [copy row IPID(2*i) in my piece of A * at IPID(2*i+1);] * end if * else * find k1 such that IPID(2*k1) = IPID(2*i+1); * copy row IPID(2*i) in U at position * IPID(2*k1+1)-IA; * save corresponding local position of this * row (LINDXA); * save local position (LINDXAU) in U where * this row goes; * end if * end if * end for * * Second, if I am not the current row process ICURROW, all source rows * in IPID that I own are part of U. Indeed, they are swapped with one * row of the current block of rows, and the main factorization * algorithm proceeds one row after each other. The processes different * from ICURROW, should exchange and accumulate those rows until they * receive some data previously owned by the process ICURROW. * * In processes different from ICURROW, the initial packing algorithm * proceeds as follows. Consider a row of global index IPID(2*i) that I * own. When I will be receiving data previously owned by ICURROW, i.e., * U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, * and this particular row of U should be first copied into my piece of * A, at A(il,:), where il is the local row index corresponding to * IPID(2*i). Now,initially, this row will be packed into workspace, say * as the kth row of that work array. The following algorithm sets * LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row * should be copied. LINDXA(k) stores the local index in A where this * row of U should be copied, i.e il. * * for all entries in IPID, * if IPID(2*i) is not in ICURROW, * copy row IPID(2*i) in work array; * save corresponding local position * of this row (LINDXA); * save position (LINDXAU) in U where * this row should be copied; * end if * end for * * Since we are at it, we also globally figure out how many rows every * process has. That is necessary, because it would rather be cumbersome * to figure it on the fly during the bi-directional exchange phase. * This information is kept in the array LLEN of size NPROW. Also note * that the arrays LINDXA and LINDXAU are of max length equal to 2*N. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * K (global input) const int * On entry, K specifies the number of entries in IPID. K is at * least 2*N, and at most 4*N. * * IPID (global input) int * * On entry, IPID is an array of length K. The first K entries * of that array contain the src and final destination resulting * from the application of the interchanges. * * LINDXA (local output) int * * On entry, LINDXA is an array of dimension 2*N. On exit, this * array contains the local indexes of the rows of A I have that * should be copied into U. * * LINDXAU (local output) int * * On exit, LINDXAU is an array of dimension 2*N. On exit, this * array contains the local destination information encoded as * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) * of A should be locally copied into A(-LINDXAU(k),:). * * LLEN (global output) int * * On entry, LLEN is an array of length NPROW. On exit, it * contains how many rows every process has. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int dst, dstrow, fndd, i, ia, icurrow, il, ip=0, iroff, j, jb, myrow, nb, nprow, src, srcrow; /* .. * .. Executable Statements .. */ /* * Compute the local arrays LINDXA and LINDXAU containing the local * source and final destination position resulting from the application * of N interchanges. */ myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; icurrow = PANEL->prow; jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; iroff = PANEL->ii; for( i = 0; i < nprow; i++ ) LLEN[i] = 0; for( i = 0; i < K; i += 2 ) { src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); LLEN[ srcrow ]++; if( myrow == srcrow ) { Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); LINDXA[ip] = il - iroff; dst = IPID[i+1]; if( myrow == icurrow ) { Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); if( dstrow == icurrow ) { if( dst - ia < jb ) { LINDXAU[ip] = dst - ia; } else { Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); LINDXAU[ip] = iroff - il; } } else { j = 0; do { fndd = ( dst == IPID[j] ); j+=2; } while( !fndd && ( j < K ) ); LINDXAU[ip] = IPID[j-1] - ia; } } else { LINDXAU[ip] = dst - ia; } ip++; } } /* * End of HPL_plindx0 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_plindx1.c0000644000000000000000000002662111256503657014266 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_plindx1 ( HPL_T_panel * PANEL, const int K, const int * IPID, int * IPA, int * LINDXA, int * LINDXAU, int * IPLEN, int * IPMAP, int * IPMAPM1, int * PERMU, int * IWORK ) #else void HPL_plindx1 ( PANEL, K, IPID, IPA, LINDXA, LINDXAU, IPLEN, IPMAP, IPMAPM1, PERMU, IWORK ) HPL_T_panel * PANEL; const int K; const int * IPID; int * IPA; int * LINDXA; int * LINDXAU; int * IPLEN; int * IPMAP; int * IPMAPM1; int * PERMU; int * IWORK; #endif { /* * Purpose * ======= * * HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing * the local source and final destination position resulting from the * application of row interchanges. In addition, this function computes * three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic * mapping information for the spreading phase. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * K (global input) const int * On entry, K specifies the number of entries in IPID. K is at * least 2*N, and at most 4*N. * * IPID (global input) const int * * On entry, IPID is an array of length K. The first K entries * of that array contain the src and final destination resulting * from the application of the interchanges. * * IPA (global output) int * * On exit, IPA specifies the number of rows that the current * process row has that either belong to U or should be swapped * with remote rows of A. * * LINDXA (global output) int * * On entry, LINDXA is an array of dimension 2*N. On exit, this * array contains the local indexes of the rows of A I have that * should be copied into U. * * LINDXAU (global output) int * * On exit, LINDXAU is an array of dimension 2*N. On exit, this * array contains the local destination information encoded as * follows. If LINDXAU(k) >= 0, row LINDXA(k) of A is to be * copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) * of A should be locally copied into A(-LINDXAU(k),:). * * IPLEN (global output) int * * On entry, IPLEN is an array of dimension NPROW + 1. On exit, * this array is such that IPLEN[i] is the number of rows of A * in the processes before process IPMAP[i] after the sort * with the convention that IPLEN[nprow] is the total number of * rows of the panel. In other words IPLEN[i+1]-IPLEN[i] is the * local number of rows of A that should be moved to the process * IPMAP[i]. IPLEN is such that the number of rows of the source * process row can be computed as IPLEN[1] - IPLEN[0], and the * remaining entries of this array are sorted so that the * quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted. * * IPMAP (global output) int * * On entry, IPMAP is an array of dimension NPROW. On exit, this * array contains the logarithmic mapping of the processes. In * other words, IPMAP[myrow] is the corresponding sorted process * coordinate. * * IPMAPM1 (global output) int * * On entry, IPMAPM1 is an array of dimension NPROW. On exit, * this array contains the inverse of the logarithmic mapping * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in * [0.. NPROCS) * * PERMU (global output) int * * On entry, PERMU is an array of dimension JB. On exit, PERMU * contains a sequence of permutations, that should be applied * in increasing order to permute in place the row panel U. * * IWORK (workspace) int * * On entry, IWORK is a workarray of dimension 2*JB. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int * iwork; int dst, dstrow, fndd, i, ia, icurrow, il, ip, ipU, iroff, j, jb, myrow, nb, nprow, src, srcrow; /* .. * .. Executable Statements .. */ /* * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 */ HPL_plindx10( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ); /* * Compute the local arrays LINDXA and LINDXAU containing the local * source and final destination position resulting from the application * of N interchanges. Compute LINDXA and LINDXAU in icurrow, and LINDXA * elsewhere and PERMU in every process. */ myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; iroff = PANEL->ii; icurrow = PANEL->prow; iwork = IWORK + jb; if( myrow == icurrow ) { for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) { src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); if( srcrow == icurrow ) { dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); Mindxg2l( il, src, nb, nb, myrow, 0, nprow ); LINDXA[ip] = il - iroff; if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) { PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; IPLEN[il]++; ipU++; } else if( dstrow != icurrow ) { j = 0; do { fndd = ( dst == IPID[j] ); j+=2; } while( !fndd && ( j < K ) ); PERMU[ipU] = IPID[j-1]-ia; il = IPMAPM1[dstrow]; j = IPLEN[il]; iwork[ipU] = LINDXAU[ip] = j; IPLEN[il]++; ipU++; } else if( ( dstrow == icurrow ) && ( dst - ia >= jb ) ) { Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); LINDXAU[ip] = iroff - il; } ip++; } } *IPA = ip; } else { for( i = 0, ip = 0, ipU = 0; i < K; i += 2 ) { src = IPID[i ]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); /* * LINDXA[i] is the local index of the row of A that belongs into U */ if( myrow == dstrow ) { Mindxg2l( il, dst, nb, nb, myrow, 0, nprow ); LINDXA[ip] = il - iroff; ip++; } /* * iwork[i] is the local (current) position index in U * PERMU[i] is the local (final) destination index in U */ if( srcrow == icurrow ) { if( ( dstrow == icurrow ) && ( dst - ia < jb ) ) { PERMU[ipU] = dst - ia; il = IPMAPM1[dstrow]; iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; } else if( dstrow != icurrow ) { j = 0; do { fndd = ( dst == IPID[j] ); j+=2; } while( !fndd && ( j < K ) ); PERMU[ipU] = IPID[j-1] - ia; il = IPMAPM1[dstrow]; iwork[ipU] = IPLEN[il]; IPLEN[il]++; ipU++; } } } *IPA = 0; } /* * Simplify iwork and PERMU, return in PERMU the sequence of permutation * that need to be apply to U after it has been broadcast. */ HPL_perm( jb, iwork, PERMU, IWORK ); /* * Reset IPLEN to its correct value */ for( i = nprow; i > 0; i-- ) IPLEN[i] = IPLEN[i-1]; IPLEN[0] = 0; /* * End of HPL_plindx1 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_plindx10.c0000644000000000000000000001562211256503657014345 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_plindx10 ( HPL_T_panel * PANEL, const int K, const int * IPID, int * IPLEN, int * IPMAP, int * IPMAPM1 ) #else void HPL_plindx10 ( PANEL, K, IPID, IPLEN, IPMAP, IPMAPM1 ) HPL_T_panel * PANEL; const int K; const int * IPID; int * IPLEN; int * IPMAP; int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that * contain the logarithmic mapping information for the spreading phase. * * Arguments * ========= * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel information. * * K (global input) const int * On entry, K specifies the number of entries in IPID. K is at * least 2*N, and at most 4*N. * * IPID (global input) const int * * On entry, IPID is an array of length K. The first K entries * of that array contain the src and final destination resulting * from the application of the interchanges. * * IPLEN (global output) int * * On entry, IPLEN is an array of dimension NPROW + 1. On exit, * this array is such that IPLEN[i] is the number of rows of A * in the processes before process IMAP[i] after the sort, with * the convention that IPLEN[nprow] is the total number of rows. * In other words, IPLEN[i+1] - IPLEN[i] is the local number of * rows of A that should be moved for each process. IPLEN is * such that the number of rows of the source process row can be * computed as IPLEN[1] - IPLEN[0], and the remaining entries of * this array are sorted so that the quantities IPLEN[i+1] - * IPLEN[i] are logarithmically sorted. * * IPMAP (global output) int * * On entry, IPMAP is an array of dimension NPROW. On exit, this * array contains the logarithmic mapping of the processes. In * other words, IPMAP[myrow] is the corresponding sorted process * coordinate. * * IPMAPM1 (global output) int * * On entry, IPMAPM1 is an array of dimension NPROW. On exit, * this array contains the inverse of the logarithmic mapping * contained in IPMAP: IPMAPM1[ IPMAP[i] ] = i, for all i in * [0.. NPROW) * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int dst, dstrow, i, ia, icurrow, jb, nb, nprow, src, srcrow; /* .. * .. Executable Statements .. */ nprow = PANEL->grid->nprow; jb = PANEL->jb; nb = PANEL->nb; ia = PANEL->ia; icurrow = PANEL->prow; /* * Compute redundantly the local number of rows that each process has * and that belong to U in IPLEN[1 .. nprow+1] */ for( i = 0; i <= nprow; i++ ) IPLEN[i] = 0; for( i = 0; i < K; i += 2 ) { src = IPID[i]; Mindxg2p( src, nb, nb, srcrow, 0, nprow ); if( srcrow == icurrow ) { dst = IPID[i+1]; Mindxg2p( dst, nb, nb, dstrow, 0, nprow ); if( ( dstrow != srcrow ) || ( dst - ia < jb ) ) IPLEN[dstrow+1]++; } } /* * Logarithmic sort of the processes - compute IPMAP, IPLEN and IPMAPM1 * (the inverse of IPMAP) */ HPL_logsort( nprow, icurrow, IPLEN, IPMAP, IPMAPM1 ); /* * End of HPL_plindx10 */ } hpcc-1.4.1/hpl/src/pgesv/HPL_rollN.c0000644000000000000000000002220011256503657013762 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #define I_SEND 0 #define I_RECV 1 #ifdef HPL_STDC_HEADERS void HPL_rollN ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int N, double * U, const int LDU, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 ) #else void HPL_rollN ( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int N; double * U; const int LDU; const int * IPLEN; const int * IPMAP; const int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_rollN rolls the local arrays containing the local pieces of U, so * that on exit to this function U is replicated in every process row. * In addition, this function probe for the presence of the column panel * and forwards it when available. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be rolled) information. * * N (local input) const int * On entry, N specifies the number of columns of U. N must be * at least zero. * * U (local input/output) double * * On entry, U is an array of dimension (LDU,*) containing the * local pieces of U in each process row. * * LDU (local input) const int * On entry, LDU specifies the local leading dimension of U. LDU * should be at least MAX(1,IPLEN[NPROW]). * * IPLEN (global input) const int * * On entry, IPLEN is an array of dimension NPROW+1. This array * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U * in each process row. * * IPMAP (global input) const int * * On entry, IMAP is an array of dimension NPROW. This array * contains the logarithmic mapping of the processes. In other * words, IMAP[myrow] is the absolute coordinate of the sorted * process. * * IPMAPM1 (global input) const int * * On entry, IMAPM1 is an array of dimension NPROW. This array * contains the inverse of the logarithmic mapping contained in * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Datatype type[2]; MPI_Status status; MPI_Request request; MPI_Comm comm; int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, ierr=MPI_SUCCESS, il, k, l, lengthR, lengthS, mydist, myrow, next, npm1, nprow, partner, prev; /* .. * .. Executable Statements .. */ if( N <= 0 ) return; npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; comm = PANEL->grid->col_comm; /* * Rolling phase */ mydist = IPMAPM1[myrow]; prev = IPMAP[MModSub1( mydist, nprow )]; next = IPMAP[MModAdd1( mydist, nprow )]; for( k = 0; k < npm1; k++ ) { l = (int)( (unsigned int)(k) >> 1 ); if( ( ( mydist + k ) & 1 ) != 0 ) { il = MModAdd( mydist, l, nprow ); lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); il = MModSub( mydist, l+1, nprow ); lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; } else { il = MModSub( mydist, l, nprow ); lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); il = MModAdd( mydist, l+1, nprow ); lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; } if( lengthR > 0 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lengthR, LDU, MPI_DOUBLE, &type[I_RECV] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[I_RECV] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Irecv( Mptr( U, ibufR, 0, LDU ), 1, type[I_RECV], partner, Cmsgid, comm, &request ); } if( lengthS > 0 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lengthS, LDU, MPI_DOUBLE, &type[I_SEND] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[I_SEND] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, ibufS, 0, LDU ), 1, type[I_SEND], partner, Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[I_SEND] ); } if( lengthR > 0 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Wait( &request, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[I_RECV] ); } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } if( ierr != MPI_SUCCESS ) { HPL_pabort( __LINE__, "HPL_rollN", "MPI call failed" ); } /* * End of HPL_rollN */ } hpcc-1.4.1/hpl/src/pgesv/HPL_rollT.c0000644000000000000000000002404711256503657014003 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #define I_SEND 0 #define I_RECV 1 #ifdef HPL_STDC_HEADERS void HPL_rollT ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int N, double * U, const int LDU, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 ) #else void HPL_rollT ( PBCST, IFLAG, PANEL, N, U, LDU, IPLEN, IPMAP, IPMAPM1 ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const int N; double * U; const int LDU; const int * IPLEN; const int * IPMAP; const int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_rollT rolls the local arrays containing the local pieces of U, so * that on exit to this function U is replicated in every process row. * In addition, this function probe for the presence of the column panel * and forwards it when available. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be rolled) information. * * N (local input) const int * On entry, N specifies the local number of rows of U. N must * be at least zero. * * U (local input/output) double * * On entry, U is an array of dimension (LDU,*) containing the * local pieces of U in each process row. * * LDU (local input) const int * On entry, LDU specifies the local leading dimension of U. LDU * should be at least MAX(1,N). * * IPLEN (global input) const int * * On entry, IPLEN is an array of dimension NPROW+1. This array * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U * in each process row. * * IPMAP (global input) const int * * On entry, IMAP is an array of dimension NPROW. This array * contains the logarithmic mapping of the processes. In other * words, IMAP[myrow] is the absolute coordinate of the sorted * process. * * IPMAPM1 (global input) const int * * On entry, IMAPM1 is an array of dimension NPROW. This array * contains the inverse of the logarithmic mapping contained in * IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #if 0 MPI_Datatype type[2]; #endif MPI_Status status; MPI_Request request; MPI_Comm comm; int Cmsgid=MSGID_BEGIN_PFACT, ibufR, ibufS, ierr=MPI_SUCCESS, il, k, l, lengthR, lengthS, mydist, myrow, next, npm1, nprow, partner, prev; /* .. * .. Executable Statements .. */ if( N <= 0 ) return; npm1 = ( nprow = PANEL->grid->nprow ) - 1; myrow = PANEL->grid->myrow; comm = PANEL->grid->col_comm; /* * Rolling phase */ mydist = IPMAPM1[myrow]; prev = IPMAP[MModSub1( mydist, nprow )]; next = IPMAP[MModAdd1( mydist, nprow )]; for( k = 0; k < npm1; k++ ) { l = (int)( (unsigned int)(k) >> 1 ); if( ( ( mydist + k ) & 1 ) != 0 ) { il = MModAdd( mydist, l, nprow ); lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); il = MModSub( mydist, l+1, nprow ); lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = prev; } else { il = MModSub( mydist, l, nprow ); lengthS = IPLEN[il+1] - ( ibufS = IPLEN[il] ); il = MModAdd( mydist, l+1, nprow ); lengthR = IPLEN[il+1] - ( ibufR = IPLEN[il] ); partner = next; } if( lengthR > 0 ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lengthR * LDU, MPI_DOUBLE, &type[I_RECV] ); else ierr = MPI_Type_vector( lengthR, N, LDU, MPI_DOUBLE, &type[I_RECV] ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[I_RECV] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), 1, type[I_RECV], partner, Cmsgid, comm, &request ); #else /* * In our case, LDU is N - Do not use the MPI datatype. */ if( ierr == MPI_SUCCESS ) ierr = MPI_Irecv( Mptr( U, 0, ibufR, LDU ), lengthR*LDU, MPI_DOUBLE, partner, Cmsgid, comm, &request ); #endif } if( lengthS > 0 ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lengthS*LDU, MPI_DOUBLE, &type[I_SEND] ); else ierr = MPI_Type_vector( lengthS, N, LDU, MPI_DOUBLE, &type[I_SEND] ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type[I_SEND] ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), 1, type[I_SEND], partner, Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[I_SEND] ); #else /* * In our case, LDU is N - Do not use the MPI datatype. */ if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibufS, LDU ), lengthS*LDU, MPI_DOUBLE, partner, Cmsgid, comm ); #endif } if( lengthR > 0 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Wait( &request, &status ); #if 0 if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type[I_RECV] ); #endif } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } if( ierr != MPI_SUCCESS ) { HPL_pabort( __LINE__, "HPL_rollT", "MPI call failed" ); } /* * End of HPL_rollT */ } hpcc-1.4.1/hpl/src/pgesv/HPL_spreadN.c0000644000000000000000000003105211256503657014275 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_spreadN ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_SIDE SIDE, const int N, double * U, const int LDU, const int SRCDIST, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 ) #else void HPL_spreadN ( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const enum HPL_SIDE SIDE; const int N; double * U; const int LDU; const int SRCDIST; const int * IPLEN; const int * IPMAP; const int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_spreadN spreads the local array containing local pieces of U, so * that on exit to this function, a piece of U is contained in every * process row. The array IPLEN contains the number of rows of U, that * should be spread on any given process row. This function also probes * for the presence of the column panel PBCST. In case of success, this * panel will be forwarded. If PBCST is NULL on input, this probing * mechanism will be disabled. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be spread) information. * * SIDE (global input) const enum HPL_SIDE * On entry, SIDE specifies whether the local piece of U located * in process IPMAP[SRCDIST] should be spread to the right or to * the left. This feature is used by the equilibration process. * * N (global input) const int * On entry, N specifies the local number of columns of U. N * must be at least zero. * * U (local input/output) double * * On entry, U is an array of dimension (LDU,*) containing the * local pieces of U. * * LDU (local input) const int * On entry, LDU specifies the local leading dimension of U. LDU * should be at least MAX(1,IPLEN[nprow]). * * SRCDIST (local input) const int * On entry, SRCDIST specifies the source process that spreads * its piece of U. * * IPLEN (global input) const int * * On entry, IPLEN is an array of dimension NPROW+1. This array * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U * in each process before process IPMAP[i], with the convention * that IPLEN[nprow] is the total number of rows. In other words * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that * should be moved to process IPMAP[i]. * * IPMAP (global input) const int * * On entry, IPMAP is an array of dimension NPROW. This array * contains the logarithmic mapping of the processes. In other * words, IPMAP[myrow] is the absolute coordinate of the sorted * process. * * IPMAPM1 (global input) const int * * On entry, IPMAPM1 is an array of dimension NPROW. This array * contains the inverse of the logarithmic mapping contained in * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ MPI_Datatype type; MPI_Status status; MPI_Comm comm; unsigned int ip2=1, mask=1, mydist, mydist2; int Cmsgid=MSGID_BEGIN_PFACT, ibuf, ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, npm1, nprow, partner; /* .. * .. Executable Statements .. */ myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; comm = PANEL->grid->col_comm; /* * Spread U to the left */ if( SIDE == HplLeft ) { nprow = ( npm1 = SRCDIST ) + 1; if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; lgth = IPLEN[nprow]; do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); if( lbuf > 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[npm1-partner], Cmsgid, comm, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); } else if( partner < nprow ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[npm1-partner], Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); } } } if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2 > 0 ); } else { npm1 = ( nprow -= SRCDIST ) - 1; if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } mydist2 = ( mydist -= SRCDIST ); il = ip2; lgth = IPLEN[SRCDIST+nprow]; /* * Spread U to the right - offset the IPLEN, and IPMAP arrays */ do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { k = il + ip2; ibuf = IPLEN[SRCDIST+il]; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; if( lbuf > 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); } else if( partner < nprow ) { if( ierr == MPI_SUCCESS ) ierr = MPI_Type_vector( N, lbuf, LDU, MPI_DOUBLE, &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, ibuf, 0, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); } } } if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2 > 0 ); } if( ierr != MPI_SUCCESS ) { HPL_pabort( __LINE__, "HPL_spreadN", "MPI call failed" ); } /* * End of HPL_spreadN */ } hpcc-1.4.1/hpl/src/pgesv/HPL_spreadT.c0000644000000000000000000003537411256503657014316 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_spreadT ( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_SIDE SIDE, const int N, double * U, const int LDU, const int SRCDIST, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 ) #else void HPL_spreadT ( PBCST, IFLAG, PANEL, SIDE, N, U, LDU, SRCDIST, IPLEN, IPMAP, IPMAPM1 ) HPL_T_panel * PBCST; int * IFLAG; HPL_T_panel * PANEL; const enum HPL_SIDE SIDE; const int N; double * U; const int LDU; const int SRCDIST; const int * IPLEN; const int * IPMAP; const int * IPMAPM1; #endif { /* * Purpose * ======= * * HPL_spreadT spreads the local array containing local pieces of U, so * that on exit to this function, a piece of U is contained in every * process row. The array IPLEN contains the number of columns of U, * that should be spread on any given process row. This function also * probes for the presence of the column panel PBCST. If available, * this panel will be forwarded. If PBCST is NULL on input, this * probing mechanism will be disabled. * * Arguments * ========= * * PBCST (local input/output) HPL_T_panel * * On entry, PBCST points to the data structure containing the * panel (to be broadcast) information. * * IFLAG (local input/output) int * * On entry, IFLAG indicates whether or not the broadcast has * already been completed. If not, probing will occur, and the * outcome will be contained in IFLAG on exit. * * PANEL (local input/output) HPL_T_panel * * On entry, PANEL points to the data structure containing the * panel (to be spread) information. * * SIDE (global input) const enum HPL_SIDE * On entry, SIDE specifies whether the local piece of U located * in process IPMAP[SRCDIST] should be spread to the right or to * the left. This feature is used by the equilibration process. * * N (global input) const int * On entry, N specifies the local number of rows of U. N must * be at least zero. * * U (local input/output) double * * On entry, U is an array of dimension (LDU,*) containing the * local pieces of U. * * LDU (local input) const int * On entry, LDU specifies the local leading dimension of U. LDU * should be at least MAX(1,N). * * SRCDIST (local input) const int * On entry, SRCDIST specifies the source process that spreads * its piece of U. * * IPLEN (global input) const int * * On entry, IPLEN is an array of dimension NPROW+1. This array * is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U * in each process before process IPMAP[i], with the convention * that IPLEN[nprow] is the total number of rows. In other words * IPLEN[i+1] - IPLEN[i] is the local number of rows of U that * should be moved to process IPMAP[i]. * * IPMAP (global input) const int * * On entry, IPMAP is an array of dimension NPROW. This array * contains the logarithmic mapping of the processes. In other * words, IPMAP[myrow] is the absolute coordinate of the sorted * process. * * IPMAPM1 (global input) const int * * On entry, IPMAPM1 is an array of dimension NPROW. This array * contains the inverse of the logarithmic mapping contained in * IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #if 0 MPI_Datatype type; #endif MPI_Status status; MPI_Comm comm; unsigned int ip2=1, mask=1, mydist, mydist2; int Cmsgid=MSGID_BEGIN_PFACT, ibuf, ierr=MPI_SUCCESS, il, k, lbuf, lgth, myrow, npm1, nprow, partner; /* .. * .. Executable Statements .. */ myrow = PANEL->grid->myrow; nprow = PANEL->grid->nprow; comm = PANEL->grid->col_comm; /* * Spread U */ if( SIDE == HplLeft ) { nprow = ( npm1 = SRCDIST ) + 1; if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) > (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } mydist2 = ( mydist = npm1 - mydist ); il = npm1 - ip2; lgth = IPLEN[nprow]; do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { lbuf = IPLEN[il+1] - ( ibuf = IPLEN[il-Mmin(il, (int)(ip2))] ); if( lbuf > 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, &type ); else ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, &type ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, IPMAP[npm1-partner], Cmsgid, comm, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else /* * In our case, LDU is N - do not use the MPI Datatypes */ if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, MPI_DOUBLE, IPMAP[npm1-partner], Cmsgid, comm, &status ); #endif } else if( partner < nprow ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, &type ); else ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, &type ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, IPMAP[npm1-partner], Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else /* * In our case, LDU is N - do not use the MPI Datatypes */ if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, MPI_DOUBLE, IPMAP[npm1-partner], Cmsgid, comm ); #endif } } } if( mydist2 < ip2 ) { ip2 >>= 1; il += ip2; } else { mydist2 -= ip2; ip2 >>= 1; il -= ip2; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2 > 0 ); } else { npm1 = ( nprow -= SRCDIST ) - 1; if( ( ( mydist = (unsigned int)(IPMAPM1[myrow]) ) < (unsigned int)(SRCDIST) ) || ( npm1 == 0 ) ) return; k = npm1; while( k > 1 ) { k >>= 1; ip2 <<= 1; mask <<= 1; mask++; } mydist2 = ( mydist -= SRCDIST ); il = ip2; /* * Spread to the right - offset the IPLEN and IPMAP arrays */ lgth = IPLEN[SRCDIST+nprow]; /* * Spread U */ do { mask ^= ip2; if( ( mydist & mask ) == 0 ) { k = il + ip2; ibuf = IPLEN[SRCDIST+il]; lbuf = ( k >= nprow ? lgth : IPLEN[SRCDIST+k] ) - ibuf; if( lbuf > 0 ) { partner = mydist ^ ip2; if( mydist & ip2 ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, &type ); else ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, &type ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm, &status ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else /* * In our case, LDU is N - do not use the MPI Datatypes */ if( ierr == MPI_SUCCESS ) ierr = MPI_Recv( Mptr( U, 0, ibuf, LDU ), lbuf*N, MPI_DOUBLE, IPMAP[SRCDIST+partner], Cmsgid, comm, &status ); #endif } else if( partner < nprow ) { #if 0 if( ierr == MPI_SUCCESS ) { if( LDU == N ) ierr = MPI_Type_contiguous( lbuf*LDU, MPI_DOUBLE, &type ); else ierr = MPI_Type_vector( lbuf, N, LDU, MPI_DOUBLE, &type ); } if( ierr == MPI_SUCCESS ) ierr = MPI_Type_commit( &type ); if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), 1, type, IPMAP[SRCDIST+partner], Cmsgid, comm ); if( ierr == MPI_SUCCESS ) ierr = MPI_Type_free( &type ); #else /* * In our case, LDU is N - do not use the MPI Datatypes */ if( ierr == MPI_SUCCESS ) ierr = MPI_Send( Mptr( U, 0, ibuf, LDU ), lbuf*N, MPI_DOUBLE, IPMAP[SRCDIST+partner], Cmsgid, comm ); #endif } } } if( mydist2 < ip2 ) { ip2 >>= 1; il -= ip2; } else { mydist2 -= ip2; ip2 >>= 1; il += ip2; } /* * Probe for column panel - forward it when available */ if( *IFLAG == HPL_KEEP_TESTING ) (void) HPL_bcast( PBCST, IFLAG ); } while( ip2 > 0 ); } if( ierr != MPI_SUCCESS ) { HPL_pabort( __LINE__, "HPL_spreadT", "MPI call failed" ); } /* * End of HPL_spreadT */ } hpcc-1.4.1/hpl/testing/matgen/HPL_dmatgen.c0000644000000000000000000001322411256503657015336 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_dmatgen ( const int M, const int N, double * A, const int LDA, const int ISEED ) #else void HPL_dmatgen ( M, N, A, LDA, ISEED ) const int M; const int N; double * A; const int LDA; const int ISEED; #endif { /* * Purpose * ======= * * HPL_dmatgen generates (or regenerates) a random matrix A. * * The pseudo-random generator uses the linear congruential algorithm: * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer * Programming, Knuth 1973, Vol. 2. * * Arguments * ========= * * M (input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * A (output) double * * On entry, A points to an array of dimension (LDA,N). On exit, * this array contains the coefficients of the randomly * generated matrix. * * LDA (input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,M). * * ISEED (input) const int * On entry, ISEED specifies the seed number to generate the * matrix A. ISEED must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int iadd[2], ia1[2], ic1[2], iran1[2], jseed[2], mult[2]; int i, incA = LDA - M, j; /* .. * .. Executable Statements .. */ if( ( M <= 0 ) || ( N <= 0 ) ) return; /* * Initialize the random sequence */ mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; jseed[0] = ISEED; jseed[1] = 0; HPL_xjumpm( 1, mult, iadd, jseed, iran1, ia1, ic1 ); HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); /* * Generate an M by N matrix */ for( j = 0; j < N; A += incA, j++ ) for( i = 0; i < M; A++, i++ ) *A = HPL_rand(); /* * End of HPL_dmatgen */ } hpcc-1.4.1/hpl/testing/matgen/HPL_jumpit.c0000644000000000000000000001225511256503657015232 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_jumpit ( int * MULT, int * IADD, int * IRANN, int * IRANM ) #else void HPL_jumpit ( MULT, IADD, IRANN, IRANM ) int * MULT; int * IADD; int * IRANN; int * IRANM; #endif { /* * Purpose * ======= * * HPL_jumpit jumps in the random sequence from the number X(n) encoded * in IRANN to the number X(m) encoded in IRANM using the constants A * and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A * and C obviously depend on m and n, see the function HPL_xjumpm in * order to initialize them. * * Arguments * ========= * * MULT (local input) int * * On entry, MULT is an array of dimension 2, that contains the * 16-lower and 15-higher bits of the constant A. * * IADD (local input) int * * On entry, IADD is an array of dimension 2, that contains the * 16-lower and 15-higher bits of the constant C. * * IRANN (local input) int * * On entry, IRANN is an array of dimension 2, that contains * the 16-lower and 15-higher bits of the encoding of X(n). * * IRANM (local output) int * * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 15-higher bits * of the encoding of X(m). * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int j[2]; /* .. * .. Executable Statements .. */ HPL_lmul( IRANN, MULT, j ); /* j = IRANN * MULT; */ HPL_ladd( j, IADD, IRANM ); /* IRANM = j + IADD; */ HPL_setran( 0, IRANM ); /* irand = IRANM */ /* * End of HPL_jumpit */ } hpcc-1.4.1/hpl/testing/matgen/HPL_ladd.c0000644000000000000000000001255711256503657014633 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef STDC_HEADERS void HPL_ladd ( int * J, int * K, int * I ) #else void HPL_ladd ( J, K, I ) int * J; int * K; int * I; #endif { /* * Purpose * ======= * * HPL_ladd adds without carry two long positive integers K and J and * puts the result into I. The long integers I, J, K are encoded on 64 * bits using an array of 2 integers. The 32-lower bits are stored in * the first entry of each array, the 32-higher bits in the second * entry. * * Arguments * ========= * * J (local input) int * * On entry, J is an integer array of dimension 2 containing the * encoded long integer J. * * K (local input) int * * On entry, K is an integer array of dimension 2 containing the * encoded long integer K. * * I (local output) int * * On entry, I is an integer array of dimension 2. On exit, this * array contains the encoded long integer result. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ unsigned int itmp0, itmp1; unsigned int ktmp0 = K[0] & 65535, ktmp1 = (unsigned)K[0] >> 16; unsigned int ktmp2 = K[1] & 65535, ktmp3 = (unsigned)K[1] >> 16; unsigned int jtmp0 = J[0] & 65535, jtmp1 = (unsigned)J[0] >> 16; unsigned int jtmp2 = J[1] & 65535, jtmp3 = (unsigned)J[1] >> 16; /* .. * .. Executable Statements .. */ /* * K[1] K[0] K I[0] = (K[0]+J[0]) % 2^32 * XXXX XXXX carry = (K[0]+J[0]) / 2^32 * * + J[1] J[0] J I[1] = K[1] + J[1] + carry * XXXX XXXX I[1] = I[1] % 2^32 * ------------- * I[1] I[0] * 0XXX XXXX I */ itmp0 = ktmp0 + jtmp0; itmp1 = itmp0 >> 16; I[0] = itmp0 - (itmp1 << 16 ); itmp1 += ktmp1 + jtmp1; I[0] |= (itmp1 & 65535) << 16; itmp0 = (itmp1 >> 16) + ktmp2 + jtmp2; I[1] = itmp0 - ((itmp0 >> 16 ) << 16); itmp1 = (itmp0 >> 16) + ktmp3 + jtmp3; I[1] |= (itmp1 & 65535) << 16; /* * End of HPL_ladd */ } hpcc-1.4.1/hpl/testing/matgen/HPL_lmul.c0000644000000000000000000001263511256503657014675 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef STDC_HEADERS void HPL_lmul ( int * K, int * J, int * I ) #else void HPL_lmul ( K, J, I ) int * K; int * J; int * I; #endif { /* * Purpose * ======= * * HPL_lmul multiplies without carry two long positive integers K and J * and puts the result into I. The long integers I, J, K are encoded on * 64 bits using an array of 2 integers. The 32-lower bits are stored in * the first entry of each array, the 32-higher bits in the second entry * of each array. For efficiency purposes, the intrisic modulo function * is inlined. * * Arguments * ========= * * K (local input) int * * On entry, K is an integer array of dimension 2 containing the * encoded long integer K. * * J (local input) int * * On entry, J is an integer array of dimension 2 containing the * encoded long integer J. * * I (local output) int * * On entry, I is an integer array of dimension 2. On exit, this * array contains the encoded long integer result. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int r, c; unsigned int kk[4], jj[4], res[5]; /* .. * .. Executable Statements .. */ /* * Addition is done with 16 bits at a time. Multiplying two 16-bit * integers yields a 32-bit result. The lower 16-bits of the result * are kept in I, and the higher 16-bits are carried over to the * next multiplication. */ for (c = 0; c < 2; ++c) { kk[2*c] = K[c] & 65535; kk[2*c+1] = ((unsigned)K[c] >> 16) & 65535; jj[2*c] = J[c] & 65535; jj[2*c+1] = ((unsigned)J[c] >> 16) & 65535; } res[0] = 0; for (c = 0; c < 4; ++c) { res[c+1] = (res[c] >> 16) & 65535; res[c] &= 65535; for (r = 0; r < c+1; ++r) { res[c] = kk[r] * jj[c-r] + (res[c] & 65535); res[c+1] += (res[c] >> 16) & 65535; } } for (c = 0; c < 2; ++c) I[c] = (int)(((res[2*c+1] & 65535) << 16) | (res[2*c] & 65535)); /* * End of HPL_lmul */ } hpcc-1.4.1/hpl/testing/matgen/HPL_rand.c0000644000000000000000000001103411256503657014640 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef STDC_HEADERS double HPL_rand( void ) #else double HPL_rand() #endif { /* * Purpose * ======= * * HPL_rand generates the next number in the random sequence. This * function ensures that this number lies in the interval (-0.5, 0.5]. * * The static array irand contains the information (2 integers) required * to generate the next number in the sequence X(n). This number is * computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the * constant d is the largest 64 bit positive unsigned integer. The array * irand is then updated for the generation of the next number X(n+1) * in the random sequence as follows X(n+1) = a * X(n) + c. The * constants a and c should have been preliminarily stored in the arrays * ias and ics as 2 pairs of integers. The initialization of ias, ics * and irand is performed by the function HPL_setran. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int j[2]; /* .. * .. Executable Statements .. */ HPL_setran( 3, j ); /* * return number between -0.5 and 0.5 */ return( HPL_HALF - (((j[0] & 65535) + ((unsigned)j[0] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF + (j[1] & 65535) + ((unsigned)j[1] >> 16) * HPL_POW16) / HPL_DIVFAC * HPL_HALF ); /* * End of HPL_rand */ } hpcc-1.4.1/hpl/testing/matgen/HPL_setran.c0000644000000000000000000001240411256503657015212 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * --------------------------------------------------------------------- * Static variables * --------------------------------------------------------------------- */ static int ias[2], ics[2], irand[2]; #ifdef HPL_STDC_HEADERS void HPL_setran ( const int OPTION, int * IRAN ) #else void HPL_setran ( OPTION, IRAN ) const int OPTION; int * IRAN; #endif { /* * Purpose * ======= * * HPL_setran initializes the random generator with the encoding of the * first number X(0) in the sequence, and the constants a and c used to * compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), * a and c are stored in the static variables irand, ias and ics. When * OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the * values of the input array IRAN. When OPTION is 3, IRAN is set to the * current value of irand, and irand is then incremented. * * Arguments * ========= * * OPTION (local input) const int * On entry, OPTION is an integer that specifies the operations * to be performed on the random generator as specified above. * * IRAN (local input/output) int * * On entry, IRAN is an array of dimension 2, that contains the * 16-lower and 15-higher bits of a random number. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int j[2]; /* .. * .. Executable Statements .. */ if( OPTION == 3 ) { /* return current value */ IRAN[0] = irand[0]; IRAN[1] = irand[1]; HPL_lmul( irand, ias, j ); /* j = irand * ias; */ HPL_ladd( j, ics, irand ); /* irand = j + ics; */ } else if( OPTION == 0 ) { irand[0] = IRAN[0]; irand[1] = IRAN[1]; } else if( OPTION == 1 ) { ias [0] = IRAN[0]; ias [1] = IRAN[1]; } else if( OPTION == 2 ) { ics [0] = IRAN[0]; ics [1] = IRAN[1]; } /* * End of HPL_setran */ } hpcc-1.4.1/hpl/testing/matgen/HPL_xjumpm.c0000644000000000000000000001645611256503657015251 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_xjumpm ( const int JUMPM, int * MULT, int * IADD, int * IRANN, int * IRANM, int * IAM, int * ICM ) #else void HPL_xjumpm ( JUMPM, MULT, IADD, IRANN, IRANM, IAM, ICM ) const int JUMPM; int * MULT; int * IADD; int * IRANN; int * IRANM; int * IAM; int * ICM; #endif { /* * Purpose * ======= * * HPL_xjumpm computes the constants A and C to jump JUMPM numbers in * the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in * MULT and IADD specify how to jump from one entry in the sequence to * the next. * * Arguments * ========= * * JUMPM (local input) const int * On entry, JUMPM specifies the number of entries in the * sequence to jump over. When JUMPM is less or equal than zero, * A and C are not computed, IRANM is set to IRANN corresponding * to a jump of size zero. * * MULT (local input) int * * On entry, MULT is an array of dimension 2, that contains the * 16-lower and 15-higher bits of the constant a to jump from * X(n) to X(n+1) = a*X(n) + c in the random sequence. * * IADD (local input) int * * On entry, IADD is an array of dimension 2, that contains the * 16-lower and 15-higher bits of the constant c to jump from * X(n) to X(n+1) = a*X(n) + c in the random sequence. * * IRANN (local input) int * * On entry, IRANN is an array of dimension 2. that contains the * 16-lower and 15-higher bits of the encoding of X(n). * * IRANM (local output) int * * On entry, IRANM is an array of dimension 2. On exit, this * array contains respectively the 16-lower and 15-higher bits * of the encoding of X(n+JUMPM). * * IAM (local output) int * * On entry, IAM is an array of dimension 2. On exit, when JUMPM * is greater than zero, this array contains the encoded * constant A to jump from X(n) to X(n+JUMPM) in the random * sequence. IAM(0:1) contains respectively the 16-lower and * 15-higher bits of this constant A. When JUMPM is less or * equal than zero, this array is not referenced. * * ICM (local output) int * * On entry, ICM is an array of dimension 2. On exit, when JUMPM * is greater than zero, this array contains the encoded * constant C to jump from X(n) to X(n+JUMPM) in the random * sequence. ICM(0:1) contains respectively the 16-lower and * 15-higher bits of this constant C. When JUMPM is less or * equal than zero, this array is not referenced. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int j[2], k; /* .. * .. Executable Statements .. */ if( JUMPM > 0 ) { IAM[0] = MULT[0]; IAM[1] = MULT[1]; /* IAM = MULT; */ ICM[0] = IADD[0]; ICM[1] = IADD[1]; /* ICM = IADD; */ for( k = 1; k <= JUMPM-1; k++ ) { HPL_lmul( IAM, MULT, j ); /* j = IAM * MULT; */ IAM[0] = j[0]; IAM[1] = j[1]; /* IAM = j; */ HPL_lmul( ICM, MULT, j ); /* j = ICM * MULT; */ HPL_ladd( IADD, j, ICM ); /* ICM = IADD + j; */ } HPL_lmul( IRANN, IAM, j ); /* j = IRANN * IAM; */ HPL_ladd( j, ICM, IRANM ); /* IRANM = j + ICM; */ } else { /* IRANM = IRANN */ IRANM[0] = IRANN[0]; IRANM[1] = IRANN[1]; } /* * End of HPL_xjumpm */ } hpcc-1.4.1/hpl/testing/pmatgen/HPL_pdmatgen.c0000644000000000000000000002065111256503657015700 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS void HPL_pdmatgen ( const HPL_T_grid * GRID, const int M, const int N, const int NB, double * A, const int LDA, const int ISEED ) #else void HPL_pdmatgen ( GRID, M, N, NB, A, LDA, ISEED ) const HPL_T_grid * GRID; const int M; const int N; const int NB; double * A; const int LDA; const int ISEED; #endif { /* * Purpose * ======= * * HPL_pdmatgen generates (or regenerates) a parallel random matrix A. * * The pseudo-random generator uses the linear congruential algorithm: * X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer * Programming, Knuth 1973, Vol. 2. * * Arguments * ========= * * GRID (local input) const HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * M (global input) const int * On entry, M specifies the number of rows of the matrix A. * M must be at least zero. * * N (global input) const int * On entry, N specifies the number of columns of the matrix A. * N must be at least zero. * * NB (global input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * A (local output) double * * On entry, A points to an array of dimension (LDA,LocQ(N)). * On exit, this array contains the coefficients of the randomly * generated matrix. * * LDA (local input) const int * On entry, LDA specifies the leading dimension of the array A. * LDA must be at least max(1,LocP(M)). * * ISEED (global input) const int * On entry, ISEED specifies the seed number to generate the * matrix A. ISEED must be at least zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int iadd [2], ia1 [2], ia2 [2], ia3 [2], ia4 [2], ia5 [2], ib1 [2], ib2 [2], ib3 [2], ic1 [2], ic2 [2], ic3 [2], ic4 [2], ic5 [2], iran1[2], iran2[2], iran3[2], iran4[2], itmp1[2], itmp2[2], itmp3[2], jseed[2], mult [2]; int ib, iblk, ik, jb, jblk, jk, jump1, jump2, jump3, jump4, jump5, jump6, jump7, lmb, lnb, mblks, mp, mycol, myrow, nblks, npcol, nprow, nq; /* .. * .. Executable Statements .. */ (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); mult [0] = HPL_MULT0; mult [1] = HPL_MULT1; iadd [0] = HPL_IADD0; iadd [1] = HPL_IADD1; jseed[0] = ISEED; jseed[1] = 0; /* * Generate an M by N matrix starting in process (0,0) */ Mnumroc( mp, M, NB, NB, myrow, 0, nprow ); Mnumroc( nq, N, NB, NB, mycol, 0, npcol ); if( ( mp <= 0 ) || ( nq <= 0 ) ) return; /* * Local number of blocks and size of the last one */ mblks = ( mp + NB - 1 ) / NB; lmb = mp - ( ( mp - 1 ) / NB ) * NB; nblks = ( nq + NB - 1 ) / NB; lnb = nq - ( ( nq - 1 ) / NB ) * NB; /* * Compute multiplier/adder for various jumps in random sequence */ jump1 = 1; jump2 = nprow * NB; jump3 = M; jump4 = npcol * NB; jump5 = NB; jump6 = mycol; jump7 = myrow * NB; HPL_xjumpm( jump1, mult, iadd, jseed, iran1, ia1, ic1 ); HPL_xjumpm( jump2, mult, iadd, iran1, itmp1, ia2, ic2 ); HPL_xjumpm( jump3, mult, iadd, iran1, itmp1, ia3, ic3 ); HPL_xjumpm( jump4, ia3, ic3, iran1, itmp1, ia4, ic4 ); HPL_xjumpm( jump5, ia3, ic3, iran1, itmp1, ia5, ic5 ); HPL_xjumpm( jump6, ia5, ic5, iran1, itmp3, itmp1, itmp2 ); HPL_xjumpm( jump7, mult, iadd, itmp3, iran1, itmp1, itmp2 ); HPL_setran( 0, iran1 ); HPL_setran( 1, ia1 ); HPL_setran( 2, ic1 ); /* * Save value of first number in sequence */ ib1[0] = iran1[0]; ib1[1] = iran1[1]; ib2[0] = iran1[0]; ib2[1] = iran1[1]; ib3[0] = iran1[0]; ib3[1] = iran1[1]; for( jblk = 0; jblk < nblks; jblk++ ) { jb = ( jblk == nblks - 1 ? lnb : NB ); for( jk = 0; jk < jb; jk++ ) { for( iblk = 0; iblk < mblks; iblk++ ) { ib = ( iblk == mblks - 1 ? lmb : NB ); for( ik = 0; ik < ib; A++, ik++ ) *A = HPL_rand(); HPL_jumpit( ia2, ic2, ib1, iran2 ); ib1[0] = iran2[0]; ib1[1] = iran2[1]; } A += LDA - mp; HPL_jumpit( ia3, ic3, ib2, iran3 ); ib1[0] = iran3[0]; ib1[1] = iran3[1]; ib2[0] = iran3[0]; ib2[1] = iran3[1]; } HPL_jumpit( ia4, ic4, ib3, iran4 ); ib1[0] = iran4[0]; ib1[1] = iran4[1]; ib2[0] = iran4[0]; ib2[1] = iran4[1]; ib3[0] = iran4[0]; ib3[1] = iran4[1]; } /* * End of HPL_pdmatgen */ } hpcc-1.4.1/hpl/testing/ptest/HPL_pddriver.c0000644000000000000000000003030711256503657015423 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #ifdef HPL_STDC_HEADERS int HPL_main ( int ARGC, char * * ARGV, HPL_RuntimeData * rdata, int *failure ) #else int HPL_main( ARGC, ARGV, rdata, failure ) /* * .. Scalar Arguments .. */ int ARGC; /* * .. Array Arguments .. */ char * * ARGV; HPL_RuntimeData * rdata; int *failure; #endif { /* * Purpose * ======= * * main is the main driver program for testing the HPL routines. * This program is driven by a short data file named "HPL.dat". * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int nval [HPL_MAX_PARAM], nbval [HPL_MAX_PARAM], pval [HPL_MAX_PARAM], qval [HPL_MAX_PARAM], nbmval[HPL_MAX_PARAM], ndvval[HPL_MAX_PARAM], ndhval[HPL_MAX_PARAM]; HPL_T_FACT pfaval[HPL_MAX_PARAM], rfaval[HPL_MAX_PARAM]; HPL_T_TOP topval[HPL_MAX_PARAM]; HPL_T_grid grid; HPL_T_palg algo; HPL_T_test test; int L1notran, Unotran, align, equil, in, inb, inbm, indh, indv, ipfa, ipq, irfa, itop, mycol, myrow, ns, nbs, nbms, ndhs, ndvs, npcol, npfs, npqs, nprow, nrfs, ntps, rank, size, tswap; HPL_T_ORDER pmapping; HPL_T_FACT rpfa; HPL_T_SWAP fswap; HPL_RuntimeData rdataCur; /* .. * .. Executable Statements .. */ /* MPI_Init( &ARGC, &ARGV ); */ #ifdef HPL_CALL_VSIPL vsip_init((void*)0); #endif MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); /* * Read and check validity of test parameters from input file * * HPL Version 1.0, Linpack benchmark input file * Your message here * HPL.out output file name (if any) * 6 device out (6=stdout,7=stderr,file) * 4 # of problems sizes (N) * 29 30 34 35 Ns * 4 # of NBs * 1 2 3 4 NBs * 0 PMAP process mapping (0=Row-,1=Column-major) * 3 # of process grids (P x Q) * 2 1 4 Ps * 2 4 1 Qs * 16.0 threshold * 3 # of panel fact * 0 1 2 PFACTs (0=left, 1=Crout, 2=Right) * 2 # of recursive stopping criterium * 2 4 NBMINs (>= 1) * 1 # of panels in recursion * 2 NDIVs * 3 # of recursive panel fact. * 0 1 2 RFACTs (0=left, 1=Crout, 2=Right) * 1 # of broadcast * 0 BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM) * 1 # of lookahead depth * 0 DEPTHs (>=0) * 2 SWAP (0=bin-exch,1=long,2=mix) * 4 swapping threshold * 0 L1 in (0=transposed,1=no-transposed) form * 0 U in (0=transposed,1=no-transposed) form * 1 Equilibration (0=no,1=yes) * 8 memory alignment in double (> 0) */ HPL_pdinfo( &test, &ns, nval, &nbs, nbval, &pmapping, &npqs, pval, qval, &npfs, pfaval, &nbms, nbmval, &ndvs, ndvval, &nrfs, rfaval, &ntps, topval, &ndhs, ndhval, &fswap, &tswap, &L1notran, &Unotran, &equil, &align ); /* * Loop over different process grids - Define process grid. Go to bottom * of process grid loop if this case does not use my process. */ for( ipq = 0; ipq < npqs; ipq++ ) { (void) HPL_grid_init( MPI_COMM_WORLD, pmapping, pval[ipq], qval[ipq], &grid ); (void) HPL_grid_info( &grid, &nprow, &npcol, &myrow, &mycol ); if( ( myrow < 0 ) || ( myrow >= nprow ) || ( mycol < 0 ) || ( mycol >= npcol ) ) goto label_end_of_npqs; for( in = 0; in < ns; in++ ) { /* Loop over various problem sizes */ for( inb = 0; inb < nbs; inb++ ) { /* Loop over various blocking factors */ for( indh = 0; indh < ndhs; indh++ ) { /* Loop over various lookahead depths */ for( itop = 0; itop < ntps; itop++ ) { /* Loop over various broadcast topologies */ for( irfa = 0; irfa < nrfs; irfa++ ) { /* Loop over various recursive factorizations */ for( ipfa = 0; ipfa < npfs; ipfa++ ) { /* Loop over various panel factorizations */ for( inbm = 0; inbm < nbms; inbm++ ) { /* Loop over various recursive stopping criteria */ for( indv = 0; indv < ndvs; indv++ ) { /* Loop over various # of panels in recursion */ /* * Set up the algorithm parameters */ algo.btopo = topval[itop]; algo.depth = ndhval[indh]; algo.nbmin = nbmval[inbm]; algo.nbdiv = ndvval[indv]; algo.pfact = rpfa = pfaval[ipfa]; if( L1notran != 0 ) { if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllN; else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrN; else algo.pffun = HPL_pdpanrlN; algo.rfact = rpfa = rfaval[irfa]; if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllN; else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrN; else algo.rffun = HPL_pdrpanrlN; if( Unotran != 0 ) algo.upfun = HPL_pdupdateNN; else algo.upfun = HPL_pdupdateNT; } else { if( rpfa == HPL_LEFT_LOOKING ) algo.pffun = HPL_pdpanllT; else if( rpfa == HPL_CROUT ) algo.pffun = HPL_pdpancrT; else algo.pffun = HPL_pdpanrlT; algo.rfact = rpfa = rfaval[irfa]; if( rpfa == HPL_LEFT_LOOKING ) algo.rffun = HPL_pdrpanllT; else if( rpfa == HPL_CROUT ) algo.rffun = HPL_pdrpancrT; else algo.rffun = HPL_pdrpanrlT; if( Unotran != 0 ) algo.upfun = HPL_pdupdateTN; else algo.upfun = HPL_pdupdateTT; } algo.fswap = fswap; algo.fsthr = tswap; algo.equil = equil; algo.align = align; HPL_pdtest( &test, &grid, &algo, nval[in], nbval[inb], &rdataCur ); if (0 == myrow && 0 == mycol) if (rdata->Gflops < rdataCur.Gflops) *rdata = rdataCur; } } } } } } } } (void) HPL_grid_exit( &grid ); label_end_of_npqs: ; } /* * Print ending messages, close output file, exit. */ if( rank == 0 ) { if (test.kfail || test.kskip) *failure = 1; test.ktest = test.kpass + test.kfail + test.kskip; #ifndef HPL_DETAILED_TIMING HPL_fprintf( test.outfp, "%s%s\n", "========================================", "========================================" ); #else if( test.thrsh > HPL_rzero ) HPL_fprintf( test.outfp, "%s%s\n", "========================================", "========================================" ); #endif HPL_fprintf( test.outfp, "\n%s %6d %s\n", "Finished", test.ktest, "tests with the following results:" ); if( test.thrsh > HPL_rzero ) { HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, "tests completed and passed residual checks," ); HPL_fprintf( test.outfp, " %6d %s\n", test.kfail, "tests completed and failed residual checks," ); HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, "tests skipped because of illegal input values." ); } else { HPL_fprintf( test.outfp, " %6d %s\n", test.kpass, "tests completed without checking," ); HPL_fprintf( test.outfp, " %6d %s\n", test.kskip, "tests skipped because of illegal input values." ); } HPL_fprintf( test.outfp, "%s%s\n", "----------------------------------------", "----------------------------------------" ); HPL_fprintf( test.outfp, "\nEnd of Tests.\n" ); HPL_fprintf( test.outfp, "%s%s\n", "========================================", "========================================" ); if( ( test.outfp != stdout ) && ( test.outfp != stderr ) ) (void) fclose( test.outfp ); } #ifdef HPL_CALL_VSIPL vsip_finalize((void*)0); #endif /* MPI_Finalize(); */ /* exit( 0 ); */ return( 0 ); /* * End of main */ } hpcc-1.4.1/hpl/testing/ptest/HPL_pdinfo.c0000644000000000000000000013145611353467335015072 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" extern int HPCC_Defaults(HPL_T_test *TEST, int *NS, int *N, int *NBS, int *NB, HPL_T_ORDER *PMAPPIN, int *NPQS, int *P, int *Q, int *NPFS, HPL_T_FACT *PF, int *NBMS, int *NBM, int *NDVS, int *NDV, int *NRFS, HPL_T_FACT *RF, int *NTPS, HPL_T_TOP *TP, int *NDHS, int *DH, HPL_T_SWAP *FSWAP, int *TSWAP, int *L1NOTRAN, int *UNOTRAN, int *EQUIL, int *ALIGN, MPI_Comm comm); #ifdef HPL_STDC_HEADERS void HPL_pdinfo ( HPL_T_test * TEST, int * NS, int * N, int * NBS, int * NB, HPL_T_ORDER * PMAPPIN, int * NPQS, int * P, int * Q, int * NPFS, HPL_T_FACT * PF, int * NBMS, int * NBM, int * NDVS, int * NDV, int * NRFS, HPL_T_FACT * RF, int * NTPS, HPL_T_TOP * TP, int * NDHS, int * DH, HPL_T_SWAP * FSWAP, int * TSWAP, int * L1NOTRAN, int * UNOTRAN, int * EQUIL, int * ALIGN ) #else void HPL_pdinfo ( TEST, NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN ) HPL_T_test * TEST; int * NS; int * N; int * NBS; int * NB; HPL_T_ORDER * PMAPPIN; int * NPQS; int * P; int * Q; int * NPFS; HPL_T_FACT * PF; int * NBMS; int * NBM; int * NDVS; int * NDV; int * NRFS; HPL_T_FACT * RF; int * NTPS; HPL_T_TOP * TP; int * NDHS; int * DH; HPL_T_SWAP * FSWAP; int * TSWAP; int * L1NOTRAN; int * UNOTRAN; int * EQUIL; int * ALIGN; #endif { /* * Purpose * ======= * * HPL_pdinfo reads the startup information for the various tests and * transmits it to all processes. * * Arguments * ========= * * TEST (global output) HPL_T_test * * On entry, TEST points to a testing data structure. On exit, * the fields of this data structure are initialized as follows: * TEST->outfp specifies the output file where the results will * be printed. It is only defined and used by the process 0 of * the grid. TEST->thrsh specifies the threshhold value for the * test ratio. TEST->epsil is the relative machine precision of * the distributed computer. Finally the test counters, kfail, * kpass, kskip, ktest are initialized to zero. * * NS (global output) int * * On exit, NS specifies the number of different problem sizes * to be tested. NS is less than or equal to HPL_MAX_PARAM. * * N (global output) int * * On entry, N is an array of dimension HPL_MAX_PARAM. On exit, * the first NS entries of this array contain the problem sizes * to run the code with. * * NBS (global output) int * * On exit, NBS specifies the number of different distribution * blocking factors to be tested. NBS must be less than or equal * to HPL_MAX_PARAM. * * NB (global output) int * * On exit, PMAPPIN specifies the process mapping onto the no- * des of the MPI machine configuration. PMAPPIN defaults to * row-major ordering. * * PMAPPIN (global output) HPL_T_ORDER * * On entry, NB is an array of dimension HPL_MAX_PARAM. On exit, * the first NBS entries of this array contain the values of the * various distribution blocking factors, to run the code with. * * NPQS (global output) int * * On exit, NPQS specifies the number of different values that * can be used for P and Q, i.e., the number of process grids to * run the code with. NPQS must be less than or equal to * HPL_MAX_PARAM. * * P (global output) int * * On entry, P is an array of dimension HPL_MAX_PARAM. On exit, * the first NPQS entries of this array contain the values of P, * the number of process rows of the NPQS grids to run the code * with. * * Q (global output) int * * On entry, Q is an array of dimension HPL_MAX_PARAM. On exit, * the first NPQS entries of this array contain the values of Q, * the number of process columns of the NPQS grids to run the * code with. * * NPFS (global output) int * * On exit, NPFS specifies the number of different values that * can be used for PF : the panel factorization algorithm to run * the code with. NPFS is less than or equal to HPL_MAX_PARAM. * * PF (global output) HPL_T_FACT * * On entry, PF is an array of dimension HPL_MAX_PARAM. On exit, * the first NPFS entries of this array contain the various * panel factorization algorithms to run the code with. * * NBMS (global output) int * * On exit, NBMS specifies the number of various recursive * stopping criteria to be tested. NBMS must be less than or * equal to HPL_MAX_PARAM. * * NBM (global output) int * * On entry, NBM is an array of dimension HPL_MAX_PARAM. On * exit, the first NBMS entries of this array contain the values * of the various recursive stopping criteria to be tested. * * NDVS (global output) int * * On exit, NDVS specifies the number of various numbers of * panels in recursion to be tested. NDVS is less than or equal * to HPL_MAX_PARAM. * * NDV (global output) int * * On entry, NDV is an array of dimension HPL_MAX_PARAM. On * exit, the first NDVS entries of this array contain the values * of the various numbers of panels in recursion to be tested. * * NRFS (global output) int * * On exit, NRFS specifies the number of different values that * can be used for RF : the recursive factorization algorithm to * be tested. NRFS is less than or equal to HPL_MAX_PARAM. * * RF (global output) HPL_T_FACT * * On entry, RF is an array of dimension HPL_MAX_PARAM. On exit, * the first NRFS entries of this array contain the various * recursive factorization algorithms to run the code with. * * NTPS (global output) int * * On exit, NTPS specifies the number of different values that * can be used for the broadcast topologies to be tested. NTPS * is less than or equal to HPL_MAX_PARAM. * * TP (global output) HPL_T_TOP * * On entry, TP is an array of dimension HPL_MAX_PARAM. On exit, * the first NTPS entries of this array contain the various * broadcast (along rows) topologies to run the code with. * * NDHS (global output) int * * On exit, NDHS specifies the number of different values that * can be used for the lookahead depths to be tested. NDHS is * less than or equal to HPL_MAX_PARAM. * * DH (global output) int * * On entry, DH is an array of dimension HPL_MAX_PARAM. On * exit, the first NDHS entries of this array contain the values * of lookahead depths to run the code with. Such a value is at * least 0 (no-lookahead) or greater than zero. * * FSWAP (global output) HPL_T_SWAP * * On exit, FSWAP specifies the swapping algorithm to be used in * all tests. * * TSWAP (global output) int * * On exit, TSWAP specifies the swapping threshold as a number * of columns when the mixed swapping algorithm was chosen. * * L1NOTRA (global output) int * * On exit, L1NOTRAN specifies whether the upper triangle of the * panels of columns should be stored in no-transposed form * (L1NOTRAN=1) or in transposed form (L1NOTRAN=0). * * UNOTRAN (global output) int * * On exit, UNOTRAN specifies whether the panels of rows should * be stored in no-transposed form (UNOTRAN=1) or transposed * form (UNOTRAN=0) during their broadcast. * * EQUIL (global output) int * * On exit, EQUIL specifies whether equilibration during the * swap-broadcast of the panel of rows should be performed * (EQUIL=1) or not (EQUIL=0). * * ALIGN (global output) int * * On exit, ALIGN specifies the alignment of the dynamically * allocated buffers in double precision words. ALIGN is greater * than zero. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ char file[HPL_LINE_MAX], line[HPL_LINE_MAX], auth[HPL_LINE_MAX], num [HPL_LINE_MAX]; FILE * infp; int * iwork; char * lineptr; int error=0, fid, i, j, lwork, maxp, nprocs, rank, size; /* .. * .. Executable Statements .. */ MPI_Comm_rank( MPI_COMM_WORLD, &rank ); MPI_Comm_size( MPI_COMM_WORLD, &size ); /* * Initialize the TEST data structure with default values */ TEST->outfp = stderr; TEST->epsil = 2.0e-16; TEST->thrsh = 16.0; TEST->kfail = TEST->kpass = TEST->kskip = TEST->ktest = 0; /* * Process 0 reads the input data, broadcasts to other processes and * writes needed information to TEST->outfp. */ if( rank == 0 ) { /* * Open file and skip data file header */ #define INFILE "hpccinf.txt" if( ( infp = fopen( INFILE, "r" ) ) == NULL ) { HPL_pwarn( stderr, __LINE__ + (1 << 30), "HPL_pdinfo", "cannot open file " INFILE ); error = 1; /* goto label_error; */ } if (infp) { (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) fgets( auth, HPL_LINE_MAX - 2, infp ); /* * Read name and unit number for summary output file */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", file ); (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); fid = atoi( num ); } fid = 8; /* always write to a file */ strcpy( file, "hpccoutf.txt" ); if ( fid == 6 ) TEST->outfp = stdout; else if( fid == 7 ) TEST->outfp = stderr; else if( ( TEST->outfp = fopen( file, "a" ) ) == NULL ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "cannot open file %s.", file ); TEST->outfp = stderr; error = 1; goto label_error; } if (error == 1) goto label_error; /* * Read and check the parameter values for the tests. * * Problem size (>=0) (N) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NS = atoi( num ); if( ( *NS < 1 ) || ( *NS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %d", "Number of values of N is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( N[ i ] = atoi( num ) ) < 0 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of N less than 0" ); error = 1; goto label_error; } } /* * Block size (>=1) (NB) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NBS = atoi( num ); if( ( *NBS < 1 ) || ( *NBS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of NB is less than 1 or", "greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NBS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( NB[ i ] = atoi( num ) ) < 1 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of NB less than 1" ); error = 1; goto label_error; } } /* * Process grids, mapping, (>=1) (P, Q) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *PMAPPIN = ( atoi( num ) == 1 ? HPL_COLUMN_MAJOR : HPL_ROW_MAJOR ); (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NPQS = atoi( num ); if( ( *NPQS < 1 ) || ( *NPQS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of grids is less", "than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NPQS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( P[ i ] = atoi( num ) ) < 1 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of P less than 1" ); error = 1; goto label_error; } } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NPQS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( Q[ i ] = atoi( num ) ) < 1 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of Q less than 1" ); error = 1; goto label_error; } } /* * Check for enough processes in machine configuration */ maxp = 0; for( i = 0; i < *NPQS; i++ ) { nprocs = P[i] * Q[i]; maxp = Mmax( maxp, nprocs ); } if( maxp > size ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Need at least %d processes for these tests", maxp ); error = 1; goto label_error; } /* * Checking threshold value (TEST->thrsh) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); TEST->thrsh = atof( num ); /* * Panel factorization algorithm (PF) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NPFS = atoi( num ); if( ( *NPFS < 1 ) || ( *NPFS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "number of values of PFACT", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NPFS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; j = atoi( num ); if( j == 0 ) PF[ i ] = HPL_LEFT_LOOKING; else if( j == 1 ) PF[ i ] = HPL_CROUT; else if( j == 2 ) PF[ i ] = HPL_RIGHT_LOOKING; else PF[ i ] = HPL_RIGHT_LOOKING; } /* * Recursive stopping criterium (>=1) (NBM) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NBMS = atoi( num ); if( ( *NBMS < 1 ) || ( *NBMS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of NBMIN", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NBMS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( NBM[ i ] = atoi( num ) ) < 1 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of NBMIN less than 1" ); error = 1; goto label_error; } } /* * Number of panels in recursion (>=2) (NDV) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NDVS = atoi( num ); if( ( *NDVS < 1 ) || ( *NDVS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of NDIV", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NDVS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( NDV[ i ] = atoi( num ) ) < 2 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of NDIV less than 2" ); error = 1; goto label_error; } } /* * Recursive panel factorization (RF) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NRFS = atoi( num ); if( ( *NRFS < 1 ) || ( *NRFS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of RFACT", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NRFS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; j = atoi( num ); if( j == 0 ) RF[ i ] = HPL_LEFT_LOOKING; else if( j == 1 ) RF[ i ] = HPL_CROUT; else if( j == 2 ) RF[ i ] = HPL_RIGHT_LOOKING; else RF[ i ] = HPL_RIGHT_LOOKING; } /* * Broadcast topology (TP) (0=rg, 1=2rg, 2=rgM, 3=2rgM, 4=L) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NTPS = atoi( num ); if( ( *NTPS < 1 ) || ( *NTPS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of BCAST", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NTPS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; j = atoi( num ); if( j == 0 ) TP[ i ] = HPL_1RING; else if( j == 1 ) TP[ i ] = HPL_1RING_M; else if( j == 2 ) TP[ i ] = HPL_2RING; else if( j == 3 ) TP[ i ] = HPL_2RING_M; else if( j == 4 ) TP[ i ] = HPL_BLONG; else if( j == 5 ) TP[ i ] = HPL_BLONG_M; else TP[ i ] = HPL_1RING_M; } /* * Lookahead depth (>=0) (NDH) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *NDHS = atoi( num ); if( ( *NDHS < 1 ) || ( *NDHS > HPL_MAX_PARAM ) ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "%s %s %d", "Number of values of DEPTH", "is less than 1 or greater than", HPL_MAX_PARAM ); error = 1; goto label_error; } (void) fgets( line, HPL_LINE_MAX - 2, infp ); lineptr = line; for( i = 0; i < *NDHS; i++ ) { (void) sscanf( lineptr, "%s", num ); lineptr += strlen( num ) + 1; if( ( DH[ i ] = atoi( num ) ) < 0 ) { HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Value of DEPTH less than 0" ); error = 1; goto label_error; } } /* * Swapping algorithm (0,1 or 2) (FSWAP) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); j = atoi( num ); if( j == 0 ) *FSWAP = HPL_SWAP00; else if( j == 1 ) *FSWAP = HPL_SWAP01; else if( j == 2 ) *FSWAP = HPL_SW_MIX; else *FSWAP = HPL_SWAP01; /* * Swapping threshold (>=0) (TSWAP) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *TSWAP = atoi( num ); if( *TSWAP <= 0 ) *TSWAP = 0; /* * L1 in (no-)transposed form (0 or 1) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *L1NOTRAN = atoi( num ); if( ( *L1NOTRAN != 0 ) && ( *L1NOTRAN != 1 ) ) *L1NOTRAN = 0; /* * U in (no-)transposed form (0 or 1) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *UNOTRAN = atoi( num ); if( ( *UNOTRAN != 0 ) && ( *UNOTRAN != 1 ) ) *UNOTRAN = 0; /* * Equilibration (0=no, 1=yes) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *EQUIL = atoi( num ); if( ( *EQUIL != 0 ) && ( *EQUIL != 1 ) ) *EQUIL = 1; /* * Memory alignment in bytes (> 0) (ALIGN) */ (void) fgets( line, HPL_LINE_MAX - 2, infp ); (void) sscanf( line, "%s", num ); *ALIGN = atoi( num ); if( *ALIGN <= 0 ) *ALIGN = 4; /* * Close input file */ label_error: if (infp) fclose( infp ); } else { TEST->outfp = NULL; } /* * Check for error on reading input file */ (void) HPL_all_reduce( (void *)(&error), 1, HPL_INT, HPL_max, MPI_COMM_WORLD ); if( error ) { /* if( rank == 0 ) HPL_pwarn( stderr, __LINE__, "HPL_pdinfo", "Illegal input in file " INFILE ". Exiting ..." ); MPI_Finalize(); #ifdef HPL_CALL_VSIPL (void) vsip_finalize( NULL ); #endif exit( 1 ); */ HPCC_Defaults( TEST, /* use outfp, set threshold */ NS, N, NBS, NB, PMAPPIN, NPQS, P, Q, NPFS, PF, NBMS, NBM, NDVS, NDV, NRFS, RF, NTPS, TP, NDHS, DH, FSWAP, TSWAP, L1NOTRAN, UNOTRAN, EQUIL, ALIGN, MPI_COMM_WORLD ); } /* * Compute and broadcast machine epsilon */ TEST->epsil = HPL_pdlamch( MPI_COMM_WORLD, HPL_MACH_EPS ); /* * Pack information arrays and broadcast */ (void) HPL_broadcast( (void *)(&(TEST->thrsh)), 1, HPL_DOUBLE, 0, MPI_COMM_WORLD ); /* * Broadcast array sizes */ iwork = (int *)malloc( (size_t)(15) * sizeof( int ) ); if( rank == 0 ) { iwork[ 0] = *NS; iwork[ 1] = *NBS; iwork[ 2] = ( *PMAPPIN == HPL_ROW_MAJOR ? 0 : 1 ); iwork[ 3] = *NPQS; iwork[ 4] = *NPFS; iwork[ 5] = *NBMS; iwork[ 6] = *NDVS; iwork[ 7] = *NRFS; iwork[ 8] = *NTPS; iwork[ 9] = *NDHS; iwork[10] = *TSWAP; iwork[11] = *L1NOTRAN; iwork[12] = *UNOTRAN; iwork[13] = *EQUIL; iwork[14] = *ALIGN; } (void) HPL_broadcast( (void *)iwork, 15, HPL_INT, 0, MPI_COMM_WORLD ); if( rank != 0 ) { *NS = iwork[ 0]; *NBS = iwork[ 1]; *PMAPPIN = ( iwork[ 2] == 0 ? HPL_ROW_MAJOR : HPL_COLUMN_MAJOR ); *NPQS = iwork[ 3]; *NPFS = iwork[ 4]; *NBMS = iwork[ 5]; *NDVS = iwork[ 6]; *NRFS = iwork[ 7]; *NTPS = iwork[ 8]; *NDHS = iwork[ 9]; *TSWAP = iwork[10]; *L1NOTRAN = iwork[11]; *UNOTRAN = iwork[12]; *EQUIL = iwork[13]; *ALIGN = iwork[14]; } if( iwork ) free( iwork ); /* * Pack information arrays and broadcast */ lwork = (*NS) + (*NBS) + 2 * (*NPQS) + (*NPFS) + (*NBMS) + (*NDVS) + (*NRFS) + (*NTPS) + (*NDHS) + 1; iwork = (int *)malloc( (size_t)(lwork) * sizeof( int ) ); if( rank == 0 ) { j = 0; for( i = 0; i < *NS; i++ ) { iwork[j] = N [i]; j++; } for( i = 0; i < *NBS; i++ ) { iwork[j] = NB[i]; j++; } for( i = 0; i < *NPQS; i++ ) { iwork[j] = P [i]; j++; } for( i = 0; i < *NPQS; i++ ) { iwork[j] = Q [i]; j++; } for( i = 0; i < *NPFS; i++ ) { if( PF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; else if( PF[i] == HPL_CROUT ) iwork[j] = 1; else if( PF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; j++; } for( i = 0; i < *NBMS; i++ ) { iwork[j] = NBM[i]; j++; } for( i = 0; i < *NDVS; i++ ) { iwork[j] = NDV[i]; j++; } for( i = 0; i < *NRFS; i++ ) { if( RF[i] == HPL_LEFT_LOOKING ) iwork[j] = 0; else if( RF[i] == HPL_CROUT ) iwork[j] = 1; else if( RF[i] == HPL_RIGHT_LOOKING ) iwork[j] = 2; j++; } for( i = 0; i < *NTPS; i++ ) { if( TP[i] == HPL_1RING ) iwork[j] = 0; else if( TP[i] == HPL_1RING_M ) iwork[j] = 1; else if( TP[i] == HPL_2RING ) iwork[j] = 2; else if( TP[i] == HPL_2RING_M ) iwork[j] = 3; else if( TP[i] == HPL_BLONG ) iwork[j] = 4; else if( TP[i] == HPL_BLONG_M ) iwork[j] = 5; j++; } for( i = 0; i < *NDHS; i++ ) { iwork[j] = DH[i]; j++; } if( *FSWAP == HPL_SWAP00 ) iwork[j] = 0; else if( *FSWAP == HPL_SWAP01 ) iwork[j] = 1; else if( *FSWAP == HPL_SW_MIX ) iwork[j] = 2; j++; } (void) HPL_broadcast( (void*)iwork, lwork, HPL_INT, 0, MPI_COMM_WORLD ); if( rank != 0 ) { j = 0; for( i = 0; i < *NS; i++ ) { N [i] = iwork[j]; j++; } for( i = 0; i < *NBS; i++ ) { NB[i] = iwork[j]; j++; } for( i = 0; i < *NPQS; i++ ) { P [i] = iwork[j]; j++; } for( i = 0; i < *NPQS; i++ ) { Q [i] = iwork[j]; j++; } for( i = 0; i < *NPFS; i++ ) { if( iwork[j] == 0 ) PF[i] = HPL_LEFT_LOOKING; else if( iwork[j] == 1 ) PF[i] = HPL_CROUT; else if( iwork[j] == 2 ) PF[i] = HPL_RIGHT_LOOKING; j++; } for( i = 0; i < *NBMS; i++ ) { NBM[i] = iwork[j]; j++; } for( i = 0; i < *NDVS; i++ ) { NDV[i] = iwork[j]; j++; } for( i = 0; i < *NRFS; i++ ) { if( iwork[j] == 0 ) RF[i] = HPL_LEFT_LOOKING; else if( iwork[j] == 1 ) RF[i] = HPL_CROUT; else if( iwork[j] == 2 ) RF[i] = HPL_RIGHT_LOOKING; j++; } for( i = 0; i < *NTPS; i++ ) { if( iwork[j] == 0 ) TP[i] = HPL_1RING; else if( iwork[j] == 1 ) TP[i] = HPL_1RING_M; else if( iwork[j] == 2 ) TP[i] = HPL_2RING; else if( iwork[j] == 3 ) TP[i] = HPL_2RING_M; else if( iwork[j] == 4 ) TP[i] = HPL_BLONG; else if( iwork[j] == 5 ) TP[i] = HPL_BLONG_M; j++; } for( i = 0; i < *NDHS; i++ ) { DH[i] = iwork[j]; j++; } if( iwork[j] == 0 ) *FSWAP = HPL_SWAP00; else if( iwork[j] == 1 ) *FSWAP = HPL_SWAP01; else if( iwork[j] == 2 ) *FSWAP = HPL_SW_MIX; j++; } if( iwork ) free( iwork ); /* * regurgitate input */ if( rank == 0 ) { HPL_fprintf( TEST->outfp, "%s%s\n", "========================================", "========================================" ); HPL_fprintf( TEST->outfp, "%s%s\n", "HPLinpack 2.0 -- High-Performance Linpack benchmark -- ", " September 10, 2008" ); HPL_fprintf( TEST->outfp, "%s%s\n", "Written by A. Petitet and R. Clint Whaley, ", "Innovative Computing Laboratory, UTK" ); HPL_fprintf( TEST->outfp, "%s%s\n", "Modified by Piotr Luszczek, ", "Innovative Computing Laboratory, UTK" ); HPL_fprintf( TEST->outfp, "%s%s\n", "Modified by Julien Langou, ", "University of Colorado Denver"); HPL_fprintf( TEST->outfp, "%s%s\n", "========================================", "========================================" ); HPL_fprintf( TEST->outfp, "\n%s\n", "An explanation of the input/output parameters follows:" ); HPL_fprintf( TEST->outfp, "%s\n", "T/V : Wall time / encoded variant." ); HPL_fprintf( TEST->outfp, "%s\n", "N : The order of the coefficient matrix A." ); HPL_fprintf( TEST->outfp, "%s\n", "NB : The partitioning blocking factor." ); HPL_fprintf( TEST->outfp, "%s\n", "P : The number of process rows." ); HPL_fprintf( TEST->outfp, "%s\n", "Q : The number of process columns." ); HPL_fprintf( TEST->outfp, "%s\n", "Time : Time in seconds to solve the linear system." ); HPL_fprintf( TEST->outfp, "%s\n\n", "Gflops : Rate of execution for solving the linear system." ); HPL_fprintf( TEST->outfp, "%s\n", "The following parameter values will be used:" ); /* * Problem size */ HPL_fprintf( TEST->outfp, "\nN :" ); for( i = 0; i < Mmin( 8, *NS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", N[i] ); if( *NS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", N[i] ); if( *NS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", N[i] ); } } /* * Distribution blocking factor */ HPL_fprintf( TEST->outfp, "\nNB :" ); for( i = 0; i < Mmin( 8, *NBS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); if( *NBS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NBS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); if( *NBS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NBS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", NB[i] ); } } /* * Process mapping */ HPL_fprintf( TEST->outfp, "\nPMAP :" ); if( *PMAPPIN == HPL_ROW_MAJOR ) HPL_fprintf( TEST->outfp, " Row-major process mapping" ); else if( *PMAPPIN == HPL_COLUMN_MAJOR ) HPL_fprintf( TEST->outfp, " Column-major process mapping" ); /* * Process grid */ HPL_fprintf( TEST->outfp, "\nP :" ); for( i = 0; i < Mmin( 8, *NPQS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", P[i] ); if( *NPQS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NPQS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", P[i] ); if( *NPQS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NPQS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", P[i] ); } } HPL_fprintf( TEST->outfp, "\nQ :" ); for( i = 0; i < Mmin( 8, *NPQS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); if( *NPQS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NPQS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); if( *NPQS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NPQS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", Q[i] ); } } /* * Panel Factorization */ HPL_fprintf( TEST->outfp, "\nPFACT :" ); for( i = 0; i < Mmin( 8, *NPFS ); i++ ) { if( PF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( PF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( PF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } if( *NPFS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NPFS ); i++ ) { if( PF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( PF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( PF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } if( *NPFS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NPFS; i++ ) { if( PF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( PF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( PF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } } } /* * Recursive stopping criterium */ HPL_fprintf( TEST->outfp, "\nNBMIN :" ); for( i = 0; i < Mmin( 8, *NBMS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); if( *NBMS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NBMS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); if( *NBMS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NBMS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", NBM[i] ); } } /* * Number of panels in recursion */ HPL_fprintf( TEST->outfp, "\nNDIV :" ); for( i = 0; i < Mmin( 8, *NDVS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); if( *NDVS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NDVS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); if( *NDVS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NDVS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", NDV[i] ); } } /* * Recursive Factorization */ HPL_fprintf( TEST->outfp, "\nRFACT :" ); for( i = 0; i < Mmin( 8, *NRFS ); i++ ) { if( RF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( RF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( RF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } if( *NRFS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NRFS ); i++ ) { if( RF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( RF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( RF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } if( *NRFS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NRFS; i++ ) { if( RF[i] == HPL_LEFT_LOOKING ) HPL_fprintf( TEST->outfp, " Left " ); else if( RF[i] == HPL_CROUT ) HPL_fprintf( TEST->outfp, " Crout " ); else if( RF[i] == HPL_RIGHT_LOOKING ) HPL_fprintf( TEST->outfp, " Right " ); } } } /* * Broadcast topology */ HPL_fprintf( TEST->outfp, "\nBCAST :" ); for( i = 0; i < Mmin( 8, *NTPS ); i++ ) { if( TP[i] == HPL_1RING ) HPL_fprintf( TEST->outfp, " 1ring " ); else if( TP[i] == HPL_1RING_M ) HPL_fprintf( TEST->outfp, " 1ringM " ); else if( TP[i] == HPL_2RING ) HPL_fprintf( TEST->outfp, " 2ring " ); else if( TP[i] == HPL_2RING_M ) HPL_fprintf( TEST->outfp, " 2ringM " ); else if( TP[i] == HPL_BLONG ) HPL_fprintf( TEST->outfp, " Blong " ); else if( TP[i] == HPL_BLONG_M ) HPL_fprintf( TEST->outfp, " BlongM " ); } if( *NTPS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NTPS ); i++ ) { if( TP[i] == HPL_1RING ) HPL_fprintf( TEST->outfp, " 1ring " ); else if( TP[i] == HPL_1RING_M ) HPL_fprintf( TEST->outfp, " 1ringM " ); else if( TP[i] == HPL_2RING ) HPL_fprintf( TEST->outfp, " 2ring " ); else if( TP[i] == HPL_2RING_M ) HPL_fprintf( TEST->outfp, " 2ringM " ); else if( TP[i] == HPL_BLONG ) HPL_fprintf( TEST->outfp, " Blong " ); else if( TP[i] == HPL_BLONG_M ) HPL_fprintf( TEST->outfp, " BlongM " ); } if( *NTPS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NTPS; i++ ) { if( TP[i] == HPL_1RING ) HPL_fprintf( TEST->outfp, " 1ring " ); else if( TP[i] == HPL_1RING_M ) HPL_fprintf( TEST->outfp, " 1ringM " ); else if( TP[i] == HPL_2RING ) HPL_fprintf( TEST->outfp, " 2ring " ); else if( TP[i] == HPL_2RING_M ) HPL_fprintf( TEST->outfp, " 2ringM " ); else if( TP[i] == HPL_BLONG ) HPL_fprintf( TEST->outfp, " Blong " ); else if( TP[i] == HPL_BLONG_M ) HPL_fprintf( TEST->outfp, " BlongM " ); } } } /* * Lookahead depths */ HPL_fprintf( TEST->outfp, "\nDEPTH :" ); for( i = 0; i < Mmin( 8, *NDHS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); if( *NDHS > 8 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 8; i < Mmin( 16, *NDHS ); i++ ) HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); if( *NDHS > 16 ) { HPL_fprintf( TEST->outfp, "\n " ); for( i = 16; i < *NDHS; i++ ) HPL_fprintf( TEST->outfp, "%8d ", DH[i] ); } } /* * Swapping algorithm */ HPL_fprintf( TEST->outfp, "\nSWAP :" ); if( *FSWAP == HPL_SWAP00 ) HPL_fprintf( TEST->outfp, " Binary-exchange" ); else if( *FSWAP == HPL_SWAP01 ) HPL_fprintf( TEST->outfp, " Spread-roll (long)" ); else if( *FSWAP == HPL_SW_MIX ) HPL_fprintf( TEST->outfp, " Mix (threshold = %d)", *TSWAP ); /* * L1 storage form */ HPL_fprintf( TEST->outfp, "\nL1 :" ); if( *L1NOTRAN != 0 ) HPL_fprintf( TEST->outfp, " no-transposed form" ); else HPL_fprintf( TEST->outfp, " transposed form" ); /* * U storage form */ HPL_fprintf( TEST->outfp, "\nU :" ); if( *UNOTRAN != 0 ) HPL_fprintf( TEST->outfp, " no-transposed form" ); else HPL_fprintf( TEST->outfp, " transposed form" ); /* * Equilibration */ HPL_fprintf( TEST->outfp, "\nEQUIL :" ); if( *EQUIL != 0 ) HPL_fprintf( TEST->outfp, " yes" ); else HPL_fprintf( TEST->outfp, " no" ); /* * Alignment */ HPL_fprintf( TEST->outfp, "\nALIGN : %d double precision words", *ALIGN ); HPL_fprintf( TEST->outfp, "\n\n" ); /* * For testing only */ if( TEST->thrsh > HPL_rzero ) { HPL_fprintf( TEST->outfp, "%s%s\n\n", "----------------------------------------", "----------------------------------------" ); HPL_fprintf( TEST->outfp, "%s\n", "- The matrix A is randomly generated for each test." ); HPL_fprintf( TEST->outfp, "%s\n", "- The following scaled residual check will be computed:" ); HPL_fprintf( TEST->outfp, "%s\n", " ||Ax-b||_oo / ( eps * ( || x ||_oo * || A ||_oo + || b ||_oo ) * N )" ); HPL_fprintf( TEST->outfp, "%s %21.6e\n", "- The relative machine precision (eps) is taken to be ", TEST->epsil ); HPL_fprintf( TEST->outfp, "%s %11.1f\n\n", "- Computational tests pass if scaled residuals are less than ", TEST->thrsh ); } } /* * End of HPL_pdinfo */ } hpcc-1.4.1/hpl/testing/ptest/HPL_pdtest.c0000644000000000000000000004361311256503657015113 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" #include #ifdef HPL_STDC_HEADERS void HPL_pdtest ( HPL_T_test * TEST, HPL_T_grid * GRID, HPL_T_palg * ALGO, const int N, const int NB, HPL_RuntimeData * rdata ) #else void HPL_pdtest ( TEST, GRID, ALGO, N, NB, rdata ) HPL_T_test * TEST; HPL_T_grid * GRID; HPL_T_palg * ALGO; const int N; const int NB; HPL_RuntimeData * rdata; #endif { /* * Purpose * ======= * * HPL_pdtest performs one test given a set of parameters such as the * process grid, the problem size, the distribution blocking factor ... * This function generates the data, calls and times the linear system * solver, checks the accuracy of the obtained vector solution and * writes this information to the file pointed to by TEST->outfp. * * Arguments * ========= * * TEST (global input) HPL_T_test * * On entry, TEST points to a testing data structure: outfp * specifies the output file where the results will be printed. * It is only defined and used by the process 0 of the grid. * thrsh specifies the threshhold value for the test ratio. * Concretely, a test is declared "PASSED" if and only if the * following inequality is satisfied: * ||Ax-b||_oo / ( epsil * * ( || x ||_oo * || A ||_oo + || b ||_oo ) * * N ) < thrsh. * epsil is the relative machine precision of the distributed * computer. Finally the test counters, kfail, kpass, kskip and * ktest are updated as follows: if the test passes, kpass is * incremented by one; if the test fails, kfail is incremented * by one; if the test is skipped, kskip is incremented by one. * ktest is left unchanged. * * GRID (local input) HPL_T_grid * * On entry, GRID points to the data structure containing the * process grid information. * * ALGO (global input) HPL_T_palg * * On entry, ALGO points to the data structure containing the * algorithmic parameters to be used for this test. * * N (global input) const int * On entry, N specifies the order of the coefficient matrix A. * N must be at least zero. * * NB (global input) const int * On entry, NB specifies the blocking factor used to partition * and distribute the matrix A. NB must be larger than one. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ #ifdef HPL_DETAILED_TIMING double HPL_w[HPL_TIMING_N]; #endif HPL_T_pmat mat; double wtime[1]; int info[3]; double Anorm1, AnormI, Gflops, Xnorm1, XnormI, BnormI, resid0, resid1; double * Bptr; void * vptr = NULL; static int first=1; int ii, ip2, mycol, myrow, npcol, nprow, nq; char ctop, cpfact, crfact; /* .. * .. Executable Statements .. */ (void) HPL_grid_info( GRID, &nprow, &npcol, &myrow, &mycol ); mat.n = N; mat.nb = NB; mat.info = 0; mat.mp = HPL_numroc( N, NB, NB, myrow, 0, nprow ); nq = HPL_numroc( N, NB, NB, mycol, 0, npcol ); mat.nq = nq + 1; /* * Allocate matrix, right-hand-side, and vector solution x. [ A | b ] is * N by N+1. One column is added in every process column for the solve. * The result however is stored in a 1 x N vector replicated in every * process row. In every process, A is lda * (nq+1), x is 1 * nq and the * workspace is mp. * * Ensure that lda is a multiple of ALIGN and not a power of 2 */ mat.ld = ( ( Mmax( 1, mat.mp ) - 1 ) / ALGO->align ) * ALGO->align; do { ii = ( mat.ld += ALGO->align ); ip2 = 1; while( ii > 1 ) { ii >>= 1; ip2 <<= 1; } } while( mat.ld == ip2 ); /* * Allocate dynamic memory */ vptr = (void*)HPCC_malloc( ( (size_t)(ALGO->align) + (size_t)(mat.ld+1) * (size_t)(mat.nq) ) * sizeof(double) ); info[0] = (vptr == NULL); info[1] = myrow; info[2] = mycol; (void) HPL_all_reduce( (void *)(info), 3, HPL_INT, HPL_max, GRID->all_comm ); if( info[0] != 0 ) { if( ( myrow == 0 ) && ( mycol == 0 ) ) HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "[%d,%d] %s", info[1], info[2], "Memory allocation failed for A, x and b. Skip." ); (TEST->kskip)++; return; } /* * generate matrix and right-hand-side, [ A | b ] which is N by N+1. */ mat.A = (double *)HPL_PTR( vptr, ((size_t)(ALGO->align) * sizeof(double) ) ); mat.X = Mptr( mat.A, 0, mat.nq, mat.ld ); HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); #ifdef HPL_CALL_VSIPL mat.block = vsip_blockbind_d( (vsip_scalar_d *)(mat.A), (vsip_length)(mat.ld * mat.nq), VSIP_MEM_NONE ); #endif /* * Solve linear system */ HPL_ptimer_boot(); (void) HPL_barrier( GRID->all_comm ); HPL_ptimer( 0 ); HPL_pdgesv( GRID, ALGO, &mat ); HPL_ptimer( 0 ); #ifdef HPL_CALL_VSIPL (void) vsip_blockrelease_d( mat.block, VSIP_TRUE ); vsip_blockdestroy_d( mat.block ); #endif /* * Gather max of all CPU and WALL clock timings and print timing results */ HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, 1, 0, wtime ); if( ( myrow == 0 ) && ( mycol == 0 ) ) { if( first ) { HPL_fprintf( TEST->outfp, "%s%s\n", "========================================", "========================================" ); HPL_fprintf( TEST->outfp, "%s%s\n", "T/V N NB P Q", " Time Gflops" ); HPL_fprintf( TEST->outfp, "%s%s\n", "----------------------------------------", "----------------------------------------" ); if( TEST->thrsh <= HPL_rzero ) first = 0; } /* * 2/3 N^3 - 1/2 N^2 flops for LU factorization + 2 N^2 flops for solve. * Print WALL time */ rdata->Gflops = Gflops = ( ( (double)(N) / 1.0e+9 ) * ( (double)(N) / wtime[0] ) ) * ( ( 2.0 / 3.0 ) * (double)(N) + ( 3.0 / 2.0 ) ); rdata->cpfact = cpfact = ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : ( ( (HPL_T_FACT)(ALGO->pfact) == (HPL_T_FACT)(HPL_CROUT) ) ? (char)('C') : (char)('R') ) ); rdata->crfact = crfact = ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_LEFT_LOOKING) ) ? (char)('L') : ( ( (HPL_T_FACT)(ALGO->rfact) == (HPL_T_FACT)(HPL_CROUT) ) ? (char)('C') : (char)('R') ) ); if( ALGO->btopo == HPL_1RING ) ctop = '0'; else if( ALGO->btopo == HPL_1RING_M ) ctop = '1'; else if( ALGO->btopo == HPL_2RING ) ctop = '2'; else if( ALGO->btopo == HPL_2RING_M ) ctop = '3'; else if( ALGO->btopo == HPL_BLONG ) ctop = '4'; else /* if( ALGO->btopo == HPL_BLONG_M ) */ ctop = '5'; rdata->ctop = ctop; rdata->eps = TEST->epsil; rdata->order = ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ); rdata->depth = ALGO->depth; rdata->nbdiv = ALGO->nbdiv; rdata->nbmin = ALGO->nbmin; rdata->time = wtime[0]; rdata->N = N; rdata->NB = NB; rdata->nprow = nprow; rdata->npcol = npcol; if( wtime[0] > HPL_rzero ) HPL_fprintf( TEST->outfp, "W%c%1d%c%c%1d%c%1d%12d %5d %5d %5d %18.2f %18.3e\n", ( GRID->order == HPL_ROW_MAJOR ? 'R' : 'C' ), ALGO->depth, ctop, crfact, ALGO->nbdiv, cpfact, ALGO->nbmin, N, NB, nprow, npcol, wtime[0], Gflops ); } #ifdef HPL_DETAILED_TIMING HPL_ptimer_combine( GRID->all_comm, HPL_AMAX_PTIME, HPL_WALL_PTIME, HPL_TIMING_N, HPL_TIMING_BEG, HPL_w ); if( ( myrow == 0 ) && ( mycol == 0 ) ) { HPL_fprintf( TEST->outfp, "%s%s\n", "--VVV--VVV--VVV--VVV--VVV--VVV--VVV--V", "VV--VVV--VVV--VVV--VVV--VVV--VVV--VVV-" ); /* * Recursive panel factorization */ if( HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "Max aggregated wall time rfact . . . : %18.2f\n", HPL_w[HPL_TIMING_RPFACT-HPL_TIMING_BEG] ); /* * Panel factorization */ if( HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "+ Max aggregated wall time pfact . . : %18.2f\n", HPL_w[HPL_TIMING_PFACT-HPL_TIMING_BEG] ); /* * Panel factorization (swap) */ if( HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "+ Max aggregated wall time mxswp . . : %18.2f\n", HPL_w[HPL_TIMING_MXSWP-HPL_TIMING_BEG] ); /* * Update */ if( HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "Max aggregated wall time update . . : %18.2f\n", HPL_w[HPL_TIMING_UPDATE-HPL_TIMING_BEG] ); /* * Update (swap) */ if( HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "+ Max aggregated wall time laswp . . : %18.2f\n", HPL_w[HPL_TIMING_LASWP-HPL_TIMING_BEG] ); /* * Upper triangular system solve */ if( HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] > HPL_rzero ) HPL_fprintf( TEST->outfp, "Max aggregated wall time up tr sv . : %18.2f\n", HPL_w[HPL_TIMING_PTRSV-HPL_TIMING_BEG] ); if( TEST->thrsh <= HPL_rzero ) HPL_fprintf( TEST->outfp, "%s%s\n", "========================================", "========================================" ); } #endif /* * Quick return, if I am not interested in checking the computations */ if( TEST->thrsh <= HPL_rzero ) { (TEST->kpass)++; if( vptr ) HPCC_free( vptr ); return; } /* * Check info returned by solve */ if( mat.info != 0 ) { if( ( myrow == 0 ) && ( mycol == 0 ) ) HPL_pwarn( TEST->outfp, __LINE__, "HPL_pdtest", "%s %d, %s", "Error code returned by solve is", mat.info, "skip" ); (TEST->kskip)++; if( vptr ) HPCC_free( vptr ); return; } /* * Check computation, re-generate [ A | b ], compute norm 1 and inf of A and x, * and norm inf of b - A x. Display residual checks. */ HPL_pdmatgen( GRID, N, N+1, NB, mat.A, mat.ld, HPL_ISEED ); rdata->Anorm1 = Anorm1 = HPL_pdlange( GRID, HPL_NORM_1, N, N, NB, mat.A, mat.ld ); rdata->AnormI = AnormI = HPL_pdlange( GRID, HPL_NORM_I, N, N, NB, mat.A, mat.ld ); /* * Because x is distributed in process rows, switch the norms */ rdata->XnormI = XnormI = HPL_pdlange( GRID, HPL_NORM_1, 1, N, NB, mat.X, 1 ); rdata->Xnorm1 = Xnorm1 = HPL_pdlange( GRID, HPL_NORM_I, 1, N, NB, mat.X, 1 ); /* * If I am in the col that owns b, (1) compute local BnormI, (2) all_reduce to * find the max (in the col). Then (3) broadcast along the rows so that every * process has BnormI. Note that since we use a uniform distribution in [-0.5,0.5] * for the entries of B, it is very likely that BnormI (<=,~) 0.5. */ Bptr = Mptr( mat.A, 0, nq, mat.ld ); if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ){ if( mat.mp > 0 ) { BnormI = Bptr[HPL_idamax( mat.mp, Bptr, 1 )]; BnormI = Mabs( BnormI ); } else { BnormI = HPL_rzero; } (void) HPL_all_reduce( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_max, GRID->col_comm ); } (void) HPL_broadcast( (void *)(&BnormI), 1, HPL_DOUBLE, HPL_indxg2p( N, NB, NB, 0, npcol ), GRID->row_comm ); rdata->BnormI = BnormI; /* * If I own b, compute ( b - A x ) and ( - A x ) otherwise */ if( mycol == HPL_indxg2p( N, NB, NB, 0, npcol ) ) { HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, mat.A, mat.ld, mat.X, 1, HPL_rone, Bptr, 1 ); } else if( nq > 0 ) { HPL_dgemv( HplColumnMajor, HplNoTrans, mat.mp, nq, -HPL_rone, mat.A, mat.ld, mat.X, 1, HPL_rzero, Bptr, 1 ); } else { for( ii = 0; ii < mat.mp; ii++ ) Bptr[ii] = HPL_rzero; } /* * Reduce the distributed residual in process column 0 */ if( mat.mp > 0 ) (void) HPL_reduce( Bptr, mat.mp, HPL_DOUBLE, HPL_sum, 0, GRID->row_comm ); /* * Compute || b - A x ||_oo */ rdata->RnormI = resid0 = HPL_pdlange( GRID, HPL_NORM_I, N, 1, NB, Bptr, mat.ld ); /* * Computes and displays norms, residuals ... */ if( N <= 0 ) { resid1 = HPL_rzero; } else { resid1 = resid0 / ( TEST->epsil * ( AnormI * XnormI + BnormI ) * (double)(N) ); } if( resid1 < TEST->thrsh ) (TEST->kpass)++; else (TEST->kfail)++; if( ( myrow == 0 ) && ( mycol == 0 ) ) { HPL_fprintf( TEST->outfp, "%s%s\n", "----------------------------------------", "----------------------------------------" ); HPL_fprintf( TEST->outfp, "%s%16.7f%s%s\n", "||Ax-b||_oo/(eps*(||A||_oo*||x||_oo+||b||_oo)*N)= ", resid1, " ...... ", ( resid1 < TEST->thrsh ? "PASSED" : "FAILED" ) ); if( resid1 >= TEST->thrsh ) { HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||Ax-b||_oo . . . . . . . . . . . . . . . . . = ", resid0 ); HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||A||_oo . . . . . . . . . . . . . . . . . . . = ", AnormI ); HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||A||_1 . . . . . . . . . . . . . . . . . . . = ", Anorm1 ); HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||x||_oo . . . . . . . . . . . . . . . . . . . = ", XnormI ); HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||x||_1 . . . . . . . . . . . . . . . . . . . = ", Xnorm1 ); HPL_fprintf( TEST->outfp, "%s%18.6f\n", "||b||_oo . . . . . . . . . . . . . . . . . . . = ", BnormI ); } } if( vptr ) HPCC_free( vptr ); /* * End of HPL_pdtest */ } hpcc-1.4.1/hpl/testing/ptimer/HPL_ptimer.c0000644000000000000000000002703311256503657015247 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * --------------------------------------------------------------------- * Static variables * --------------------------------------------------------------------- */ static int HPL_ptimer_disabled; static double HPL_ptimer_cpusec [HPL_NPTIMER], HPL_ptimer_cpustart [HPL_NPTIMER], HPL_ptimer_wallsec [HPL_NPTIMER], HPL_ptimer_wallstart[HPL_NPTIMER]; /* * --------------------------------------------------------------------- * User callable functions * --------------------------------------------------------------------- */ #ifdef HPL_STDC_HEADERS void HPL_ptimer_boot( void ) #else void HPL_ptimer_boot() #endif { /* * HPL_ptimer_boot (re)sets all timers to 0, and enables HPL_ptimer. */ /* * .. Local Variables .. */ int i; /* .. * .. Executable Statements .. */ HPL_ptimer_disabled = 0; for( i = 0; i < HPL_NPTIMER; i++ ) { HPL_ptimer_cpusec [i] = HPL_ptimer_wallsec [i] = HPL_rzero; HPL_ptimer_cpustart[i] = HPL_ptimer_wallstart[i] = HPL_PTIMER_STARTFLAG; } /* * End of HPL_ptimer_boot */ } #ifdef HPL_STDC_HEADERS void HPL_ptimer( const int I ) #else void HPL_ptimer( I ) const int I; #endif { /* * Purpose * ======= * * HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in * seconds. Up to 64 separate timers can be functioning at once. The * first call starts the timer, and the second stops it. This routine * can be disenabled by calling HPL_ptimer_disable(), so that calls to * the timer are ignored. This feature can be used to make sure certain * sections of code do not affect timings, even if they call routines * which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable * the timer functionality. One can retrieve the current value of a * timer by calling * * t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) * * where I is the timer index in [0..64). To inititialize the timer * functionality, one must have called HPL_ptimer_boot() prior to any of * the functions mentioned above. * * Arguments * ========= * * I (global input) const int * On entry, I specifies the timer to stop/start. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ if( HPL_ptimer_disabled ) return; /* * If timer has not been started, start it. Otherwise, stop it and add * interval to count */ if( HPL_ptimer_wallstart[I] == HPL_PTIMER_STARTFLAG ) { HPL_ptimer_wallstart[I] = HPL_ptimer_walltime(); HPL_ptimer_cpustart [I] = HPL_ptimer_cputime (); } else { HPL_ptimer_cpusec [I] += HPL_ptimer_cputime ()-HPL_ptimer_cpustart [I]; HPL_ptimer_wallsec [I] += HPL_ptimer_walltime()-HPL_ptimer_wallstart[I]; HPL_ptimer_wallstart[I] = HPL_PTIMER_STARTFLAG; } /* * End of HPL_ptimer */ } #ifdef HPL_STDC_HEADERS void HPL_ptimer_enable( void ) #else void HPL_ptimer_enable() #endif { /* * HPL_ptimer_enable sets it so calls to HPL_ptimer are not ignored. */ /* .. * .. Executable Statements .. */ HPL_ptimer_disabled = 0; return; /* * End of HPL_ptimer_enable */ } #ifdef HPL_STDC_HEADERS void HPL_ptimer_disable( void ) #else void HPL_ptimer_disable() #endif { /* * HPL_ptimer_disable sets it so calls to HPL_ptimer are ignored. */ /* .. * .. Executable Statements .. */ HPL_ptimer_disabled = 1; return; /* * End of HPL_ptimer_disable */ } #ifdef HPL_STDC_HEADERS double HPL_ptimer_inquire ( const HPL_T_PTIME TMTYPE, const int I ) #else double HPL_ptimer_inquire( TMTYPE, I ) const int I; const HPL_T_PTIME TMTYPE; #endif { /* * Purpose * ======= * * HPL_ptimer_inquire returns wall- or cpu- time that has accumulated in * timer I. * * Arguments * ========= * * TMTYPE (global input) const HPL_T_PTIME * On entry, TMTYPE specifies what time will be returned as fol- * lows * = HPL_WALL_PTIME : wall clock time is returned, * = HPL_CPU_PTIME : CPU time is returned (default). * * I (global input) const int * On entry, I specifies the timer to return. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double time; /* .. * .. Executable Statements .. */ /* * If wall- or cpu-time are not available on this machine, return * HPL_PTIMER_ERROR */ if( TMTYPE == HPL_WALL_PTIME ) { if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) time = HPL_PTIMER_ERROR; else time = HPL_ptimer_wallsec[I]; } else { if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) time = HPL_PTIMER_ERROR; else time = HPL_ptimer_cpusec [I]; } return( time ); /* * End of HPL_ptimer_inquire */ } #ifdef HPL_STDC_HEADERS void HPL_ptimer_combine ( MPI_Comm COMM, const HPL_T_PTIME_OP OPE, const HPL_T_PTIME TMTYPE, const int N, const int IBEG, double * TIMES ) #else void HPL_ptimer_combine( COMM, OPE, TMTYPE, N, IBEG, TIMES ) const int IBEG, N; const HPL_T_PTIME_OP OPE; const HPL_T_PTIME TMTYPE; MPI_Comm COMM; double * TIMES; #endif { /* * Purpose * ======= * * HPL_ptimer_combine combines the timing information stored on a scope * of processes into the user TIMES array. * * Arguments * ========= * * COMM (global/local input) MPI_Comm * The MPI communicator identifying the process collection on * which the timings are taken. * * OPE (global input) const HPL_T_PTIME_OP * On entry, OP specifies what combine operation should be done * as follows: * = HPL_AMAX_PTIME get max. time on any process (default), * = HPL_AMIN_PTIME get min. time on any process, * = HPL_SUM_PTIME get sum of times across processes. * * TMTYPE (global input) const HPL_T_PTIME * On entry, TMTYPE specifies what time will be returned as fol- * lows * = HPL_WALL_PTIME : wall clock time is returned, * = HPL_CPU_PTIME : CPU time is returned (default). * * N (global input) const int * On entry, N specifies the number of timers to combine. * * IBEG (global input) const int * On entry, IBEG specifies the first timer to be combined. * * TIMES (global output) double * * On entry, TIMES is an array of dimension at least N. On exit, * this array contains the requested timing information. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ int i, tmpdis; /* .. * .. Executable Statements .. */ tmpdis = HPL_ptimer_disabled; HPL_ptimer_disabled = 1; /* * Timer has been disabled for combine operation - copy timing informa- * tion into user times array. If wall- or cpu-time are not available * on this machine, fill in times with HPL_PTIMER_ERROR flag and return. */ if( TMTYPE == HPL_WALL_PTIME ) { if( HPL_ptimer_walltime() == HPL_PTIMER_ERROR ) { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } else { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_wallsec[IBEG+i]; } } else { if( HPL_ptimer_cputime() == HPL_PTIMER_ERROR ) { for( i = 0; i < N; i++ ) TIMES[i] = HPL_PTIMER_ERROR; return; } else { for( i = 0; i < N; i++ ) TIMES[i] = HPL_ptimer_cpusec[IBEG+i]; } } /* * Combine all nodes information, restore HPL_ptimer_disabled, and return */ for( i = 0; i < N; i++ ) TIMES[i] = Mmax( HPL_rzero, TIMES[i] ); if( OPE == HPL_AMAX_PTIME ) (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); else if( OPE == HPL_AMIN_PTIME ) (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_min, COMM ); else if( OPE == HPL_SUM_PTIME ) (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_sum, COMM ); else (void) HPL_all_reduce( (void *)(TIMES), N, HPL_DOUBLE, HPL_max, COMM ); HPL_ptimer_disabled = tmpdis; /* * End of HPL_ptimer_combine */ } hpcc-1.4.1/hpl/testing/ptimer/HPL_ptimer_cputime.c0000644000000000000000000001334111256503657016772 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Purpose * ======= * * HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, * the clock() function is used to return an approximation of processor * time used by the program. The value returned is the CPU time used so * far as a clock_t; to get the number of seconds used, the result is * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C * standard library. If HPL_USE_TIMES is defined, the times() function * is used instead. This function returns the current process times. * times() returns the number of clock ticks that have elapsed since the * system has been up. Otherwise and by default, the standard library * function getrusage() is used. * * --------------------------------------------------------------------- */ #if defined( HPL_USE_CLOCK ) #include #ifdef HPL_STDC_HEADERS double HPL_ptimer_cputime( void ) #else double HPL_ptimer_cputime() #endif { static double cps = CLOCKS_PER_SEC; double d; clock_t t1; static clock_t t0 = 0; if( t0 == 0 ) t0 = clock(); t1 = clock() - t0; d = (double)(t1) / cps; return( d ); } #elif defined( HPL_USE_TIMES ) #include #include #ifdef HPL_STDC_HEADERS double HPL_ptimer_cputime( void ) #else double HPL_ptimer_cputime() #endif { clock_t t1; struct tms ts; static double ClockTick = HPL_rzero; if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); (void) times( &ts ); return( (double)(ts.tms_utime) / ClockTick ); } #elif defined( HPL_USE_GETPROCESSTIMES ) #include #include #ifdef HPL_STDC_HEADERS double HPL_ptimer_cputime( void ) #else double HPL_ptimer_cputime() #endif { FILETIME creation, exit, kernel, user; GetProcessTimes( GetCurrentProcess(), &creation, &exit, &kernel, &user ); return (*(LONGLONG*)&kernel+*(LONGLONG*)&user)*1e-7; } /* #elif defined( HPL_USE_GETRUSAGE ) */ #else #include #include #ifdef HPL_STDC_HEADERS double HPL_ptimer_cputime( void ) #else double HPL_ptimer_cputime() #endif { struct rusage ruse; (void) getrusage( RUSAGE_SELF, &ruse ); return( (double)( ruse.ru_utime.tv_sec ) + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); } /* #else #ifdef HPL_STDC_HEADERS double HPL_ptimer_cputime( void ) #else double HPL_ptimer_cputime() #endif { return( HPL_PTIMER_ERROR ); } */ #endif /* * End of HPL_ptimer_cputime */ hpcc-1.4.1/hpl/testing/ptimer/HPL_ptimer_walltime.c0000644000000000000000000001011511256503657017136 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Purpose * ======= * * HPL_ptimer_walltime returns the elapsed (wall-clock) time. * * * --------------------------------------------------------------------- */ #if defined( HPL_USE_GETTIMEOFDAY ) #include #include #ifdef HPL_STDC_HEADERS double HPL_ptimer_walltime( void ) #else double HPL_ptimer_walltime() #endif { struct timeval tp; static long start=0, startu; if( !start ) { (void) gettimeofday( &tp, NULL ); start = tp.tv_sec; startu = tp.tv_usec; return( HPL_rzero ); } (void) gettimeofday( &tp, NULL ); return( (double)( tp.tv_sec - start ) + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); } #else #ifdef HPL_STDC_HEADERS double HPL_ptimer_walltime( void ) #else double HPL_ptimer_walltime() #endif { return( MPI_Wtime() ); } #endif /* * End of HPL_ptimer_walltime */ hpcc-1.4.1/hpl/testing/timer/HPL_timer.c0000644000000000000000000002007111256503657014702 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * --------------------------------------------------------------------- * Static variables * --------------------------------------------------------------------- */ static int HPL_timer_disabled; static double HPL_timer_cpusec [HPL_NTIMER], HPL_timer_cpustart [HPL_NTIMER], HPL_timer_wallsec [HPL_NTIMER], HPL_timer_wallstart[HPL_NTIMER]; /* * --------------------------------------------------------------------- * User callable functions * --------------------------------------------------------------------- */ #ifdef HPL_STDC_HEADERS void HPL_timer_boot( void ) #else void HPL_timer_boot() #endif { /* * HPL_timer_boot (re)sets all timers to 0, and enables HPL_timer. */ /* * .. Local Variables .. */ int i; /* .. * .. Executable Statements .. */ HPL_timer_disabled = 0; for( i = 0; i < HPL_NTIMER; i++ ) { HPL_timer_cpusec [i] = HPL_timer_wallsec [i] = HPL_rzero; HPL_timer_cpustart[i] = HPL_timer_wallstart[i] = HPL_TIMER_STARTFLAG; } /* * End of HPL_timer_boot */ } #ifdef HPL_STDC_HEADERS void HPL_timer( const int I ) #else void HPL_timer( I ) const int I; #endif { /* * Purpose * ======= * * HPL_timer provides a "stopwatch" functionality cpu/wall timer in * seconds. Up to 64 separate timers can be functioning at once. The * first call starts the timer, and the second stops it. This routine * can be disenabled by calling HPL_timer_disable(), so that calls to * the timer are ignored. This feature can be used to make sure certain * sections of code do not affect timings, even if they call routines * which have HPL_timer calls in them. HPL_timer_enable() will re-enable * the timer functionality. One can retrieve the current value of a * timer by calling * * t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) * * where I is the timer index in [0..64). To initialize the timer * functionality, one must have called HPL_timer_boot() prior to any of * the functions mentioned above. * * Arguments * ========= * * I (global input) const int * On entry, I specifies the timer to stop/start. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ if( HPL_timer_disabled ) return; /* * If timer has not been started, start it. Otherwise, stop it and add * interval to count */ if( HPL_timer_wallstart[I] == HPL_TIMER_STARTFLAG ) { HPL_timer_wallstart[I] = HPL_timer_walltime(); HPL_timer_cpustart [I] = HPL_timer_cputime (); } else { HPL_timer_cpusec [I] += HPL_timer_cputime () - HPL_timer_cpustart [I]; HPL_timer_wallsec [I] += HPL_timer_walltime() - HPL_timer_wallstart[I]; HPL_timer_wallstart[I] = HPL_TIMER_STARTFLAG; } /* * End of HPL_timer */ } #ifdef HPL_STDC_HEADERS void HPL_timer_enable( void ) #else void HPL_timer_enable() #endif { /* * HPL_timer_enable sets it so calls to HPL_timer are not ignored. */ /* .. * .. Executable Statements .. */ HPL_timer_disabled = 0; return; /* * End of HPL_timer_enable */ } #ifdef HPL_STDC_HEADERS void HPL_timer_disable( void ) #else void HPL_timer_disable() #endif { /* * HPL_timer_disable sets it so calls to HPL_timer are ignored. */ /* .. * .. Executable Statements .. */ HPL_timer_disabled = 1; return; /* * End of HPL_timer_disable */ } #ifdef HPL_STDC_HEADERS double HPL_timer_inquire ( const HPL_T_TIME TMTYPE, const int I ) #else double HPL_timer_inquire( TMTYPE, I ) const int I; const HPL_T_TIME TMTYPE; #endif { /* * Purpose * ======= * * HPL_timer_inquire returns wall- or cpu- time that has accumulated in * timer I. * * Arguments * ========= * * TMTYPE (global input) const HPL_T_TIME * On entry, TMTYPE specifies what time will be returned as fol- * lows * = HPL_WALL_TIME : wall clock time is returned, * = HPL_CPU_TIME : CPU time is returned (default). * * I (global input) const int * On entry, I specifies the timer to return. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ double time; /* .. * .. Executable Statements .. */ /* * If wall- or cpu-time are not available on this machine, return * HPL_TIMER_ERROR */ if( TMTYPE == HPL_WALL_TIME ) { if( HPL_timer_walltime() == HPL_TIMER_ERROR ) time = HPL_TIMER_ERROR; else time = HPL_timer_wallsec[I]; } else { if( HPL_timer_cputime() == HPL_TIMER_ERROR ) time = HPL_TIMER_ERROR; else time = HPL_timer_cpusec [I]; } return( time ); /* * End of HPL_timer_inquire */ } hpcc-1.4.1/hpl/testing/timer/HPL_timer_cputime.c0000644000000000000000000001332511256503657016434 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Purpose * ======= * * HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, * the clock() function is used to return an approximation of processor * time used by the program. The value returned is the CPU time used so * far as a clock_t; to get the number of seconds used, the result is * divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C * standard library. If HPL_USE_TIMES is defined, the times() function * is used instead. This function returns the current process times. * times() returns the number of clock ticks that have elapsed since the * system has been up. Otherwise and by default, the standard library * function getrusage() is used. * * --------------------------------------------------------------------- */ #if defined( HPL_USE_CLOCK ) #include #ifdef HPL_STDC_HEADERS double HPL_timer_cputime( void ) #else double HPL_timer_cputime() #endif { static double cps = CLOCKS_PER_SEC; double d; clock_t t1; static clock_t t0 = 0; if( t0 == 0 ) t0 = clock(); t1 = clock() - t0; d = (double)(t1) / cps; return( d ); } #elif defined( HPL_USE_TIMES ) #include #include #ifdef HPL_STDC_HEADERS double HPL_timer_cputime( void ) #else double HPL_timer_cputime() #endif { clock_t t1; struct tms ts; static double ClockTick = HPL_rzero; if( ClockTick == HPL_rzero ) ClockTick = (double)(sysconf(_SC_CLK_TCK)); (void) times( &ts ); return( (double)(ts.tms_utime) / ClockTick ); } #elif defined( HPL_USE_GETPROCESSTIMES ) #include #include #ifdef HPL_STDC_HEADERS double HPL_timer_cputime( void ) #else double HPL_timer_cputime() #endif { FILETIME creation, exit, kernel, user; GetProcessTimes( GetCurrentProcess(), &creation, &exit, &kernel, &user ); return (*(LONGLONG*)&kernel+*(LONGLONG*)&user)*1e-7; } /* #elif defined( HPL_USE_GETRUSAGE ) */ #else #include #include #ifdef HPL_STDC_HEADERS double HPL_timer_cputime( void ) #else double HPL_timer_cputime() #endif { struct rusage ruse; (void) getrusage( RUSAGE_SELF, &ruse ); return( (double)( ruse.ru_utime.tv_sec ) + ( (double)( ruse.ru_utime.tv_usec ) / 1000000.0 ) ); } /* #else #ifdef HPL_STDC_HEADERS double HPL_timer_cputime( void ) #else double HPL_timer_cputime() #endif { return( HPL_TIMER_ERROR ); } */ #endif /* * End of HPL_timer_cputime */ hpcc-1.4.1/hpl/testing/timer/HPL_timer_walltime.c0000644000000000000000000001010611256503657016576 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 2.0 - September 10, 2008 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratory * (C) Copyright 2000-2008 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratory. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include "hpl.h" /* * Purpose * ======= * * HPL_timer_walltime returns the elapsed (wall-clock) time. * * * --------------------------------------------------------------------- */ #if defined( HPL_USE_GETTIMEOFDAY ) #include #include #ifdef HPL_STDC_HEADERS double HPL_timer_walltime( void ) #else double HPL_timer_walltime() #endif { struct timeval tp; static long start=0, startu; if( !start ) { (void) gettimeofday( &tp, NULL ); start = tp.tv_sec; startu = tp.tv_usec; return( HPL_rzero ); } (void) gettimeofday( &tp, NULL ); return( (double)( tp.tv_sec - start ) + ( (double)( tp.tv_usec-startu ) / 1000000.0 ) ); } #else #ifdef HPL_STDC_HEADERS double HPL_timer_walltime( void ) #else double HPL_timer_walltime() #endif { return( MPI_Wtime() ); } #endif /* * End of HPL_timer_walltime */ hpcc-1.4.1/hpl/www/1rinM.jpg0000644000000000000000000001443711256503657012443 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÀ SŸÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ?÷ú(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š¯o}ow=Ü0I¾KID3¤lr‹ õù]O¾¹¬ýÅ:7†"†M^óÈó·˜Ñ"y]‚)w`ˆ mU–Ær2*MKÄš.‘£.±ªZA§ºŽá¥eK ˜ûä¨$É=³Z”QEQEQUìï­õkY<ÈÖY!'ixÝ£qÏ£+|qÅeêþ/Ðô+ß²jWr[¸Hä6ò´Q,ŽQ ’…),¬2Ät­Ê(¢«Þß[éð,×Ryq´±ÂÒrò:ƃVeÙçŠUÕlt=.ãSÔîc¶³·MòÊý2IÀrIdš¯¤x‡L×^î+ ä3Ú:¥ÄÁ$ÄYw.èäU`9<ã¡­J(¢«Ùß[ê4Ö²y‘¬²BNÒ0ñ»FãŸFVøãŠŽëT²²¿±±¸›eÍûº[&Òw•Bì2Î*åQUî/­í'´†y6Iw)†´î¤#ŽŸ*1çÓ×_ß[éšuÍýäž]­¬O4Ï´¨ –8œzU}3YµÕüß³E}•ßk°žÛ9Î1æ¢îéÛ8ã=EhQE^ÎúßP¦µ“Ìe’v‘‡Ú7ú2°÷ÇV]ÏŒ4 ?G¡O©Fšƒ¼qù{ª¼ŒhÎÅvÅUˆ'°9¹EU{‹ë{Ií!žM’]Êa€m'{„iã§ÊŒyôõÅG©jpiVë=ÄwnŒáµ´–᳂yXÕˆuÆ:zН¡ø‡Lñ½Ìú\òJ–× k0’ "håP !WPA‡jÔ¢Šàχõ‹MGį ÙG§O{©År·‰,‹ˆ¾ÎªÃ&ŽD¡Ø†EÉrCrÀ‰gâÝ.âÛWº³“[¼‚ÞîÝ-â½€±óM»!.aBfåf†B¶ëL’‡·~k%¼Ôìü7 “òZRAKsæBX Ömö¹ª|J×$Ñ<)=‡‡,eS×mŸkÎãɶaøeÇ×îàI¡ÿ ³þ§ßÿàãÿ°£þgýO¾9ÿÁÇÿaGü*ÏúŸ|sÿƒþÂøUŸõ>øçÿý…ð«?ê}ñÏþ?û ?áVÔûãŸüö¬ÿ©÷Ç?ø8ÿì(ÿ…YÿSïŽðqÿØQÿ ³þ§ßÿàãÿ°¬?Q×~øÈi¾&Ö.u/ j²eªßHd{Y±Är9û¹ü§ï ¸p4´ß ø§KŠæ×B‚=Ð^Ý?—Õ´ 0iähä -g8òŒkË)1´ 5Z¾Õ%µºÑu'Ó'²5)t§²IuY†7 fgÌAFÌ*F[{ýÕÀ>™EW7ãk^H²£ÕÛ[lË,©:ù„.L~`䌂GzË·±ñ•¬sê1A­yq -ìgÔa+$ÊêË‚,£ÚÃi*ÄÞ[žÒõ ]g\Ô®ÓRŠ ÷FŠNXež& #2£DXt…ŽÒœ·YEWØxsÄ:f’ö^µG ©ßI"ÇqoÏÌZèóäÊ®Â6ãcøWůâk7Ú¬&Öãl°ºùžD~]Àg¦%f1S"#çpF/è”QEr~6Ñ'Õ%Ю¬´Øîîì¯]ÖBbV· ª²0e0±Â±ù Å@¬ÿì¿eêÑj±Ç­A=”‹ÕcQ9l»’Î"„®ðìd€@ÎåÐð}–¯k¬Iw¥k¥Êñ}Š×S¿s£ýën åQŽÌ)‘¹V?.ìWYEW™é¾ñN—Í®…z= ½º.;«h`ÓÈÑÈZÎqå×–R6chG¡x¢­ÄPjQÜK©ÚÏq~54û#Û­¬Q̯ǘìÈê…J6T.+Ó(¢Šäüm¢OªK¡]Yi±ÝÝÙ^»¬„Ä­n Ud `Êac…còŠFŸŒ4ýX–îXïõn[OŠK¨åV”+aNÛ{|Û$ýWôV›‘¥ÛØ@Ò:B˜2JAy[«;çf%˜÷$žõrŠ+Ÿÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*¸-cÇ:Žuû¿ ¯‰­t¿ Ú¦ÝBìÝÇ ê N0³õ|Î3¸|£ ÛŒ¶:Gí2Î;;ˆ“ÚZÇÁâ-$“…’Iük¨ÿ…ykÿCŠÿðw?øÑÿ ò×þ†?ÿàîñ¬ýOÃ^Ñ<¯íoë–v|¿µøŽH·ãÆæÆGOQ\Ouéֺǂ~ ˨ZÎZk[¯Çsñ·óœ… Ÿ›œ¥ð§ã¶³¥ÏcâýbÒßTË¥ÅÆËtž#Œs»Á$c#iÃôOøNüÿC^‡ÿƒøªÐÓ5Ý[ó²u[ÿ'gÙ.]™Î3´œg¯¡­ +‹ñµð÷Å:,úN­â=kI°H”JÊAÈ`CdÀä+ô/‹ÏüO“5캅ìäh¢¶2ÛÏ*ÛžSG̪Wå ·¿.8ëï~%IñÅ2éo‰m¼3ákb¦ãQ’é-î®°sˆ·W$qÀå¾ðŒôßgð7ýëïü*Ïÿ]ü+Ë_úüWÿƒ¹ÿÆøW–¿ô1ø¯ÿsÿgê~ðî‰åkxç\°ó³åý¯ÄrE¿Î70Î2:zŠãüaªØxoFMWÁÞ:ŸRÕ ”1‚ãıÏ”‹–ŠF"N8 ¿6XÓ*×öÔ%ðU­¢ÛÛ?ŠåŸìÞt¤GO—¾vª“’1 ©c´aOO¤Ûx{û=$×þ-KuªÊL·/iâ1*ìrV4V(è8@0£{Lðׇu¿7û'Ç:åÿ“3ìž#’]™Î3µŽ3ƒ×ÐÖ‡ü+Ë_úüWÿƒ¹ÿÆøW–¿ô1ø¯ÿsÿsÿgð7ýëïü*Ïÿ^]oñkÄ~ ñ¤šdšãk^µ¼d +Åu$¶ÅòL¤}‡Œ¶àŒÉþ!Y|Cñt¶Iâá¿ i¤9¸¢ÒçQä°+SŒ€3‚ÃgG‡ƒ.®"··ø“©M<®8ãñK3;€’Iã¹ÿ ò×þ†?ÿàîñ£þå¯ý ~+ÿÁÜÿãYz–ƒá}ámõOk63²Xî¼Jñ1\‘ÁÆAö5çÿµ±á{{ KÁ9»¿ž7ssæx†;µU *³¹c!%‰ÈnÒHèE­ãáÿ„Rõb¹ñiŸìФ˜Š3³ÈØTEÁòW?(9^ŠÈx~êÜ\øƒâ·ªMóÏýŸâ¶·CŒlŽ5`€ÉåŽIÆp74ß hzÍ»\i~3ñô åK_Ë*†À8%XŒàƒqW?á^ZÿÐÇâ¿üÏþ4¼µÿ¡Åø;Ÿük’×Àï*üQÔ@uñ_+î2ä~b¼ãÂ|G xªÇMñf«æ‘bÞâH|«’¹Y|ØÉÞGÊå¸ÝÁzèlðmͼ°?…t`’!F1ÙFŒá”§ÜGjóÿ x[AO\èðèmLJbKÕ¶{ë’y&Š[q&ã bF•ã\³UòÄC↴ãÁ—^‰¦ØÎÞ(±F’ÖÕ"b¹s‚TŒ€qì+Õ(¯3Ô´7Yøú¶ú¦Ÿi}ø\:Çu Ê¡¾ÔF@`FpHϹ®³þOÿЩ¡ÿàºþ&¼ßâ׆ô/ ZèÚþ‡á-*}Mµ²[ChZÕâ› aB›qnÈ^xÅj|.Óôø8k:§…|"'{‡EŽÇOOݪàb@ÀùËc?u”÷­„Ckoã {x£†¼Qz‘Ç…TPà8Åz%äÿ<'á½Oányáý*îêO?|ÓÙG#¶' ±8ºwÂÓü=©ÞÞxOJ6¶ö’Ë0·±‰$(¨Kl` €pA=ÅyÂ]JÃ^ñ ¯‡µ?xsìi,¶—7¶Q‹¹Ñ\…ùö†I ¹ØÌNTƒÝüBðÖƒ£\x2ãKÑ4ÛÛÅ(ÒZÚ¤LW.pJ€q=…z¥æz–“¦ë?VßTÓí/ _ ‡Xî¡YT7ÚˆÈ Î ÷5Ö àÿú4?üCÿÄ×›øëáôZ¸|U¢éÞ ±Ò-­ÛbÕ vŠFºçþ¯øÍqÿµtñÃ[IÒtÝrâú'Ë‹ûí7m™òÑ8òkÔ(¯?ñªÏoñÀڷد§±²ûÚe´³–ãÊß ªäF¬yi:N­¦ë–÷ÐyÞd_Ø·o·tÎÕˆƒÁƒ]‡ü&Z_üúëŸø"½ÿã4Âe¥ÿÏ®¹ÿ‚+ßþ3\Å-]ºçþ¯øÍð™ióë®àŠ÷ÿŒ×'ã}Gþ; Á¥éÚ̯mâ;;©ŒšEÔKJX3–xÀnëÓ(¯;Ô¤—JøØºÅÅŽ¤ú{xpZ‹‹[ ®ÍûIm‡ËFÁÚ3ùzŠé?á2Òÿç×\ÿÁïÿ£þ-/þ}uÏü^ÿñš?á2Òÿç×\ÿÁïÿ¬?…ö×0Ûøª{‹;»T¼ñÝÔêÝág‰Âp®Áúv5ÞQ^_ð·WO|8Òt[M×-ï ó¼È¿±nßné‡+‚»øL´¿ùõ×?ðE{ÿÆhÿ„ËKÿŸ]sÿW¿üf¹?ê?ð‘ÜxN /NÖe{oÙÝLdÒ.¢XâRÁœ³Æpï^™EyÞ¥$ºWÆÅÖ.,u'ÓÛÂÔ\ZØMp¾oÚKl>Z6ÑŸËÔWIÿ –—ÿ>ºçþ¯øÍð™ióë®àŠ÷ÿŒÑÿ –—ÿ>ºçþ¯øÍaü/¶¹†ßÅSÜYÝÚ¥çˆîî Vï Òh…²I<Ûä’U'ËHËì à¢5?ÄÛsüIñN´Ó£¶¶¹ŸO¾¾”ZÁ>¡lÖko¸Óâä uŒ 3–(§Á­ïj2êþ Ðõ‰Ö{‹›ešEn /Àà|Ùàt­ª(¬?_ßi^ Öõ 0Æ/-¬¥š&vÀBªNïºÀ ‚@€r,ñF¯vö0xoûfk{Ë‰Ú $}¨ùKP±]lÜ›¦í˜¨UÇ$wMâê:5ò\Gp—6ñ̳GF²PwbJƒœà’GBjåVŒ¯ï´¯kz†˜c–ÖRÍ;`!U'wÝ`HÁ @9'¶kˆ¬î-5(ï ˆ¤é—oÉœLb S?ÇÀ#žgøv-aüK¨Êþ!»Ô4[dû"Çuoi.AÝ$@QîñÉÞ$zÊ(¢ŠËÕ|9¤k—ój–1Ý›ttD”“+•,3µÆQ  ‚0FkÄ6žðg…ïõ : 2Ìli–ŸfšG„ThŠ¶ã¸Ž£†l¥«?CÖu½3C²³Ò~ßZXGù›ëhÊ©çæV}ÁŽrw|Ù'<æ´?á'ñgýëïüZñtÂOâÏú'×ßø2´ÿâèÿ„ŸÅŸôO¯¿ðeiÿÅÕ=WÇ~!Ñt»JÿÀ:’Z[&ù]/mä*½ÎÔbp:“ŽIàÇð5×Ã<Úv“áûI¤±·…³©Û$²²ÆÐÒvð‡^ЙÎ •Ågèž$ðÿü%7š|+¨k)¦–ÉöÛ(ÉÜaI_ËLîp È/·+’zïøIüYÿDúûÿVŸü]ð“ø³þ‰õ÷þ ­?øº?á'ñgýëïüZñtÂOâÏú'×ßø2´ÿâë‹Öþ$xfÃÅSYøÛÂ:•Ýͼ6ò­Á[›y ]Y9G ňeV`wÈ"»‹ÍGÁ¾ðÔúüÚm„茭§Á›³‚cTÛ€ä‚HçrMdø{ÅZÔžÓNð÷PX¶ŒZnÔmÇî‚€Ÿ}÷t“ÉëZ_ð“ø³þ‰õ÷þ ­?øº?á'ñgýëïüZñtÂOâÏú'×ßø2´ÿâë/Äž.Ö ðÖ¤ú×ÃÝHéfÝÒïeü|¦[ˆÜ¶0NHè2xÆj?‡úÃÿù÷Ú/‡ ŽûO•Io¬£7;ŽJÊeËbUŽâŲ¤ž šp]xsLñ¬ºƒü)w­jJJe0^mµÓÚVùÒ1+ùq¹ ä n˜“þÑ>¾ÿÁ•§ÿGü$þ,ÿ¢}}ÿƒ+Oþ.øIüYÿDúûÿVŸü]ð“ø³þ‰õ÷þ ­?øºäõ¯ˆ^“Ķº7Žü%=¥âºG_ˆ§Vb3)%¼½«´ã$|Àcæ¥ñ/x;ÃÉ6¥¦-ºÈ&† .V´kŸ0 •JÄÈHH÷ÈÂÔ€t¬Ti%¾²ŒÜî9+)—,Y‰V;‹Ê’z‚}ÎÆßO¡µË¥’b7—‘ÚG<ú³1öÏUŠ(¢¼ÞëÄž<ÔüwâÃQøqmt³eõ%œ;y±oê‡7aÛ­Xÿ‹¿ÿR7þM×+â[ǯi§ˆõßÚ­™i,ì¯.¦ŠÝ¥y»XH\‚$“ÕÅßÿ©ÿ&ëcá߉/<]àM7\¿ŽÝéƒl®ƒ’z(ï]ErÿËw<„¤:üŸ¼PÛ7S†,AÁÈ-^àm;Ç:O†£°ð¥ÏÃË›\¬“ÛÏs1’\Ìî¼#A€ ܵñ'4Ïø{Cñ,~k]_í8}5g.¾T[ú¹ÀÉ+Ø÷é^‘Ey½×‰[a¶*åJ)×õmKâΣ_j— ংÊÞK‰1tXª)bHÀõÜxkR›Y𮑪\,k=í”7,`… è’N2}MjW;ãnÿÃ޻ԴĶ{Ô’â[Æ<É2Góm ôcÐ×#­èŸ¼G£Ï¤êÖþ¸±Ÿo™ûÔݵƒT‚9ðkÌ$ø[ñáΨkšV¡cÿ’Evö3~ñ-ñ½Øqƒ•;óŒw®ÏÀËâÝ'K@ð¦©ðÖäB†YÞîy¥ðWÚrI8Æ`+®Òµ¿ÙøËLÑüNž6ú„FÚ`Ÿz˜‚g&Cã~•ßQ^ouâOj~;ñ‡á¨ü8¶ºGÙ²ú’μطõCƒ‚°íÖ¹|,ñ÷/`½ÕÂpÝÈMk-Ò–@I C^¬N@žµÏizŒ-þ"‹{Wðäþ"°ò­¬¡ñÌØ•zE,@d†Ç%ƒ–,¡ðkÒõmKâΣ_j— ংÊÞK‰1tXª)bHÀõÜxkR›Y𮑪\,k=í”7,`… è’N2}MjWñĚLJ4íû;¾ÔµXtõûr¹|Àø'ar<ñž+ŸÖôO‰Þ#ÑçÒuk\XÏ·Ì‹}ênÚÁ‡*A€x5æü=ñ×Â'>0µ¹Ñ®ÑgŒ]8C)8o/pÝ· %¶`ô#Ä?µøÞM#Uøu|)[Ïræ=ÙÆàW8<t>•¥á¿xÃþžñDzü‚Ž ’i‹7üõ€L‡ýî1éÍzWŸøKþJ÷Ä_û†é;W W‰üa[{OͨköÒK¤KáË›=6GŒËZ‹#ŽT.Ðsò’=CÁpMkà_[ÜE$3Ŧ[$‘È¥YD ‚ ƒÆ+›ø%ÿ$‡Bÿ·ý(’½¼ÿãoü’wþÝÿô¢:ô ¯co©é×6‘ù–·Q<3&â7#dr2 é^ðvÆßLÓ¼Yag—kkâ[ÈaMÄíE…<œ:ÕÿÉ^øuÿq?ý'Zô +Ïü%ÿ%{â/ýÃ?ô«Ð+Ãþ(j^ ²ñF£¤ÜYéQjú¥¢íbú9.~È0#Qh¬DÞ^XÝŽ’r+×<5Œ>Ò"Òæ’}=,¡[Ydgˆ ØÇÉ\ƒè+“ø%ÿ$‡Bÿ·ý(’½¼ÿãoü’wþÝÿô¢:ô åü}sá«/-ÏŠì~צEw Û4Â7Þ»8Q“œõ®vÖâþ uâ§ÓÚ;è Ÿ]^ /±Er<¯õkPÆwdŽ[ÌöÎç‹ä¯|:ÿ¸Ÿþ“­zçþÿ’½ñþáŸúNÕèàÿíõë•Õ/.|=æè–v‘Ai~n¡Ä.òÆÒJ#Ç™¹Š¤C‘€òãÔÿ¯MGùA]…çþÿ’½ñþáŸúNÕèW?ã¿ù'ž%ÿ°U×þŠj< ÿ$óÃ_ö µÿÑK]yÿÅ?ù’¿ìk±ÿÙëÐ(¢¼ÿþnþåOý»¯@¢Šãï>Aqâ=K\´ñ¹¦Ýj^WÚÊh•ËM‹ÃFOL÷îhÿ„ëþ‡ÿàLüføAn¿èxñ_þÁÿÆhÿ„ëþ‡ÿàLüf¶<-á»?ørÓC°’y-mwìyØ;œä€V=«b±üSá»?xrïC¿’xín¶ox]\`GU«þ[¯ú´Ðì$žK[]ûvÎçg9 ÕjجønÏÅÞ»Ðïäž;[­›ÞÆ×W$ÕGjÇÿ„ëþ‡ÿàLüføAn¿èxñ_þÁÿÆhÿ„ëþ‡ÿàLüf‹?‡Ð[øM×.üA®jWZo›öt½š&Eócp±ƒÓû ì(®>óáô#ÔµËOkšmÖ¥å}¡,¦‰Q¼´Ø¼4dôÏ~æøAn¿èxñ_þÁÿÆhÿ„ëþ‡ÿàLüf¡»øt÷ösÙÝxÏÅ2ÛÏE,ms`ƒû®à×U¤é°èÚ5Ž—nÒ4VñÛÆÒXª(PNÀôr²¼G Ûø›BŸIºžâfhØËnÁdRŽ®¤ꣵb uÿCÇŠÿð&þ3Gü ·_ô*𥧋l¬­î®ï- •â^Á5›ªºÊ‚œ²°þ,ôì+7þ[¯úðÿÛ„ÿÂ0ÿÄþ  0!1A@PB!1"AQ2#a$B0q3 ‘rSRbCs% !1"AQaq‘2B#0ð¡ÁÑR3±b‚¢ÂC$P`áñ’Òc“4!1AQaq0‘¡ ±ðÁÑáñÿÚ ÷ðàÊ¥FEúst%¬­Zr––€hETÛ%ô*x»NI• ØäÜ^L™4•‚‹$ …ÅÎd³ ›@x3Ëg¤7G’+~Áô@ò8F©0°•2‡ÀÒ\“€UqiB›ÈY’PäU±}‘dk먹OLfB`y ÌôØ€“žÞNùg±àcíÇ§Éæ§jø¯AeûÏ7ŠiªŸÑbž¨K9 >!E±×‚`3ƒÓXEýdfSÑZË" H}`Œ .2üéõ:CÑev–§$_ÏéÁüž×pþ´ø£o5”oν>¸ÉǪ–þ[[¹o–mg¸õÓ!Ôä¯üƵ‰êr+ß/­ÊíÎJI­ÅºÈƲ;zêEûO”ò[·¿9¯°ôyeZ< $¢q= qii±UÕíšGÏcî—+WùÉkŒ²iÇ®”|ö|_Ëúj×MsÊJuèòÕ]šàø®©zŸ|XwL7F/ÜáÕR««—îg×>[boè3 þwRC«Æ‡šÝ5ÉûK•%3GE¿ÏÃÑ8ôY‘|níqkz+¸=מÿÚôûwícêO×v_²éaQÝAØk=&ÉðáÖoT2¨emZÄNØè+1×´ŒöÍi¤‰öM}­c/)HÊ»º{¶IÜìKyóô&MÇR¤m:ÔH«{H˜Ï°uÃlÕµaè¥mܲvz¶ëðö*òë·ûýUDblšðk$ìºì(ß´k~^¿öµ¯“e×E ›"z”Œõxû+ìÿ²þ«àõód4+ÿK`’ET}"쑇¡ìƒ7TÙ"Ä×wÙ«u}Àû¥.·°~»ëM…Ëõž»6º|AìZnÁúžÇòÐt;o[?Zm3m¦®Õ(/(öj-õ°&SÚÕB¨K¨ û©ôˆš†Ï†§°Öåx °õ½¸ÛíN³±ÂÔ`iW¤}µ‚ý¡èc 0·íÌÑõÿ¢bÙjoWLo©ušý”»,hç#QN¢œ)žD~Ÿ2'A%E :TaãeEr8¢c ÂJŒyQº¢D§IŽÄt¸¬_*7z|=và`ì }ᬮ3Òæ¹-«-*ö8]×B¼_±áñ+ìY¤@Rì[,¸ H±mãeIªml¢E×n‚0ëSÇ%(-Fz&ÕO¢Ÿ-mu¶Ê}ä#e~µ`ÓÚÔ¶m³i¬ÑÂÕmÙ^ºõëjíjìK`Ú«ìA¡·MV±ɺ}›·ê[N±/NÛj`%SÝèsŠ0`…Ùý’«´Õ–öJ¢…·ñ]9›5y+ãÜA“{À]‚ 0›x".Å]Ú‹md³_ׂ{/á=®Û*vÇY^oØaùöj©ÇM¢»â˳ֈ'x_ƒìo­ë÷èZÿúý²ƾ¾TK(#³€êk"d%²cN#Øk&©cLìTÕÒºªª>š¡#_“8©©°•õi5¢¯¡|yzÛæX‹Vp°úlŒºO}²¨Ý%kôáWXÂÓÖ-œ§¯9ϳŸg>ÿíU9Æû8Î=8Ïçœã?¬ã8ôáq3Oüÿ;©l´‰!Õ9@æ-Öæ:ï #f\‹†èâ4’ÁdÜú҇¤„7– »ÄpQ–I)® ³6—8 )E:áÜÐ)i•T ‚Ú{J6Gšp¸ÛREoj¬)ÅV}mp²ë¤iiC æ/VËo¸* 35ݶöÖâò䤻!GHhbØz°¡)•e%×8ò¯¨=Z *îÈ]±uÈþšª4Ë'i˜}÷ÔºTš³ve·á]©(P hkŠNø¹j›µZ ì²gÞ…5Å»V‰®ëK›-DúŒ;Ÿ"àåZ“-ÅJK\ÙÝi¯¥CVyS® „.¹\þYòkÚ‘jãî°ó6SAD-¤Ô+Ó¬ÛfîqYZ{ïs]V2Jp÷À±_€/‹ˆš­ÑJƒÒ°œ››jѰmJå%TfÞ°Îd:L³ÃN–•_Óª<‹žÃn¦[Bi*jÎUpY&Øw&ÇÜÔ«pê ™:ÚÛ¦ë.6–šæa”¸ã¦bvýÆñ_ʾ̚ƒ1U›Ž´ëÎNë™Ãeʲ²•–¢Tg5«Ý|½áæ-…sT8=ÕaP˜ß9ʘx†y3h•Ua%D‡ÝuÏåÞëd¶™øxf^¼±Í ÙO2.w3 ×U˜5pA ¶×[lܘ¼ÓH*ID )ã¢-—Ý[Ö­±­4}$(ªr(FÅÙÉ­1h3 8.3Z¶@æ™rbâ¢h¥Âœu¸»áä ÙI,*-§eÁÙ³š•Bm‹œ“.d}ðñ.È„7öe»ÔÿÚ?þ£0+Ì]»uùí‡HkgíÓóMsš[$×®%„Oó<®véáóö—¡`c…a¬#A)00ƒ0­ˆT)†$ QZa¬!ÇÕ¶ú-?݇Sx„Æ—n:ýñáÝëÛq½P·ž]º_8á°‰w[3í¾sê$õÃ.;²¾-ç.=ÊH–ˆ©²^]Ãõ;D[?Ås›Äýuã»R°ÛIWõ Ncð‹…Ïoξ†„Ó¿|rèâê±À‘×kvc6¬óaøEÆUEÇ"ëžœ]¢nqn°¹Pº[t¨ºÊˆ$ÇX_tr桱{Z1ü½S‹WšQȲ8ƒ®+i¥œïIÕ*eŸT>Ý× *q•aØhÖ¿›¦.8q"U¯¼;=1ƬpÑß^Ÿnì ŽŸÞŸ_­ðÞḘû·?Ä~qâ=Çy>ÆöŒ«dÌ ß—Û wlŽ86¬tE'ÃzLªÚALtêŽ ·‰=8&¸mÈ›WàŠK÷à˜®¨ËIá¸é0m²—9­YiÁ(¤”˜qʶÎ8ÍDBqœ±”0e©èg›m°†¥IÊXÅLLHEš‰Ã^¬GOÛ\áÏ~ (}º`rùx§n­qFPòÕ™ßò1O®mÖâÓkXîÁ<ÓÅe(.¢F™.†‚ ¸Á¸ž!¶[ûU.·Á{¸•ª¤Æ¹A´˜Ü«ñùǘ uHáÚ\F‰I _p+%ž­Z¾2’ÊÒ§1ÚR€Ëm4‹¡ó:´a¾Àj-I¹tƒ¨jÂÄg†ò hM_m„p§óKN„Æpâ48¿VĸŬ¥mSÞ–]‰%‡8…qÃP ¸uC©fG¹Ä…ú€ºbÖ_´òìqÃå'7ºžðWÜb—(ï.!%)Co–æmºwã?¨ÿÚ?þ€ñùWç‘}»ŒpâÐÏôD{–w_»ß N“º4§å~O庘L„ëSåþç27Ë(WÅãn_¨Qnšs 0˜Ö¶WØm[#;J& Æv±{ÆdVq¤é3Çõ·rG|U¬û2;wôMÛ®“¦í¼bs™;øü e¨•õTÈäk- >›6Ke2è˜`}J#„²çákš~¹?'MpúM1Z­§s£«]†Z Ʊ3Å?ÈæñT!4{«õQ݉DL2¿Q£Õ\ëèöóŸyŒ`'hhÞqÞª §t¥p5,´››MØS¬éëÅúñIJÖ.­›—ê+!Q–kW¦ËFÔÃw@G߯ÓñÓËbUPêåsCŠÈG1ÆXîEæl0sˆ-ܬYÛ¬¤ä Š&|iMov4»êÚdT:¶™Kî~ä+/ÜrÀ9áÌÃå(e»:O*ÓjÏ€²Rg¶J#—Ï‹øoñ‡çà ÚÕóY ÊSÆW§bÐCb½h² +îJ§V@íÛé릿…üÆ3òúkøÎö¿}ôïÓµêu·l÷i¦»yúpÚìòl=òz‹-F QX¤,u¯¨J& 'œLOÊx©z×a«Ó¾¶º–äªn)zÇTɱ[Æ nºIDzÌq^?/ýÕ&ä«~ú¶Ç :޾¢êht–å¿DGÇŠÞ0‹Š}«˜…åjÜ[ë3a¨!‚Òcn½׈ÛÑ÷kÆ?/Ÿ¹ƒÀ·!ß’‘>C‹¿\Â…–¡§VývÅ{b0¿f» vÏ>)Þ¹žÄU§‘ˆ*Ÿª¤\õ˜mpDk#Ê5çÆgÈq–ªfªaéd­²q÷å5˜Ú‡m•bÊzËŒGÇMxÆy&Aõ±²8Ì~FJõ¤©U¿!YVAe½%‘WoÃtü8ñ¼…gãòœöi8³Ê†I!FŠË Â-S;~„Á mŸYå§ Í77‹^!úBr'z¸ÓiL”l[å› ›‚cl{µ‰ùq^å¼î"½[ŠcéØnF *ÚS-mS–í²µïd5ú£çÅšžÄMêiu‹U!T¬×U`&X6¤[,‚ÂHõDyÏ.'Èâò1”TÆ*ØäìÕ®Ú'jP»“4©ÖpÀ Ó2'VÈ·;ˆ] Ù©pò5µ¨@Ûº[ÓwD2[fvéÏ‹9Íb¦…+RåÞþ¬T«hº:V}™d%OÖÈFÙwG¯û sJjc}dˆ§ô޹&VÈäíeòÛŽ«¹áL½tQ.·/Hž-e+^½ªtà1å‰üX®º÷v^$¾íšÏS~°eÛ<^ñk]®Y µU%ÿÅ“¢;›´'é—ÑßÿÒ?×&„Žö¿q*Ò7±•Ø<ôqOlj»„†Žļª–EoìÜ527)cÖBL!&¾’`gãcUoÕe?â\öÓI³_ÈžÛ“Fºäš[,È·ÚÁä:ýQÀ øÍœë2ÿÅX¯ n:KœE8S†×ud!UÓÕë.J:ƒ§ëÇŽ×=,øë+øÆ_ɬõk<‡Ä±–(¨zDÈcW“y%‡íжë>œñLž© Wü½E_¥…R³xûÉ¡c4Éy‚Y¿u†Xƒ=ó队ã|S¼zí;>-æI~g¸ì!GR ëú VÙÜÖ€x†ºn™ŽQ1Ï/ÇNÚôqØ\ÎÁ Ü; AvÎNÆé§bLj”G´§—.-doÒòJV¿ ŽÄ:3ãʤgLµéPVF¹Töè·0cxéÇ”E_½äXß&ËÆn…¼mª šÖl¤vž@n¾¿n+5êÌHgç¬Gâÿþ¬ÿ(Þ¿÷wVkøMzý~û¾Óìöû6üuÛÆöð 1äß"Ì^cF‹5ÂÙš6©”µ$ØÔCœŒü#á<~kÇlä¯QÿÉ3c £n˺òV´b)3¦§ ¾¾Q½Ü¸ñ©ü›í/ó3¨Ñ±H‹oȬZ~=6͚Ɋ‰ 1Ô0’€˜.^šøeû¸ …_ÿÒñ{nGbßÅåÓ“ï —¿u£¦ðÞ^íºzéüOW)ƒÞÿ"þi{'®›2½Éáú¡-`4žé »¶“:qàøÃŠ´³´¼OÈ+æhÝÈáÐÿÄ\ÈÚµ§2‡b!ÞéàÒnÝ}›c^WÆj]„ÞñÆ?Vô¯¾mžÎí3í ö‡‘ûBbbcAã ‚ ·M¸ñz—œøñüµ»(×í²ð4mŠ_’L¤e}oH>\ãXéþ/vùc?’ly%ŒsO .µ‡* Ñ9ÙüwXŠvJÇAêrÓŒ‘XñÛ©Wó?/ò Xè6Ý wÓUlTZõÒkq¯¿§¬u~í?Œ‹)‰§Œ½çÙ 6KUðŠÍ$[‡¨Q¾A¥&:q0 ùi¯Œ¶Gà¯òlë-Z¾;S8욬 [•XƒVS"mÝ'd¬n=f?ºtãÃiYñ›î„y[¯æ1þv™ªÎµ“zî⤮t¯´ :{§XN@S¬òŸIøñÍÊx«›ýBE~¿YG¤zð'×NÃÝ ]PÚ{"döκÈŽ.:Õ~ží›úëÙ¿Mvîݦí>:Ê-ä¢c–ÍyÆÉ"ˆ=cåÄï°€Û¶ sV;dâH"u.[ÄfcçL”ï).`B'I9й OÇŽ©±b¾SÔ# 'ÒwLíçÀõ•õ>ìßÿ.³½xÚÛQi®Ö5`Z|ô"‰Óƒá—,ª÷.ë“ÚŒÇ_N*dìÛŦ•Öô‰”C$§Sö;űõ.$éé“@b¦îœÌÏ#ÛÏO^f¸Kù¢ Ëtzê­Kîrùq5û„w¥Uõ t×t¯vý4àg)W •UbÔ&ú(Þ]s=9p-ÈþšéÂÆ¯J+ˆÀ&³¢ ØÂý#¦šG§ø25èÕMü‡K½¸´,lÚ„€­P÷@õ ˆgúÛÇõ:]ÀÖi®Ó[ËÖ>#½q¯éÆGr¹¿z¥º„P˜»]#m,AY¨Ùšo¤ë·ÝÎbx·”·!^æ4èYX%€Ejµ€_S‚Ù‘Û´uÝëËŽXÛ3?«•ÿ³Ä,z+ÌòƒsÉœŠ'üõã¹x;îÌo»pe*úC0;„~¸ÓŠÕ ˆâºˆ2úޱ^âÿÜ[xÊTN’ëX뵕;bZúÌXj_Ü\f7c]ZlbpxÅW°ú×2’e³U†(S~Ù"‰/—¦¯¯)ó Eà|½›‰Ç­(‹g«bz”Æáúä½#ŒÀAP©O,¿_QêfÄÛf"7ýžÜ¾ÔnÛûøÅcâŸkf®Y6VÕƒb„eyM)$ZH¼„}ðO(ç:ñ„]|eÁÊQ©f¾Ö!Ž’}ï]ÅÚ~Ьý7¨uºrãË¥”ÒóÈã±AŽÕö2õZVÒÒOT÷W•±ÚAÞSëÆ·© RŒjeB¹&ºÏ3ݱ G¬¢¼¡™Ögq|çŒ5¨§ù tvmPÞHåõzUž ´Ä¡¹kÊg–ícÓ›A N=964?clkõò)³S÷ÅÜ¡·¨¨Ô4ö”rão²®õÖ}ɺÖ&a%²ºðpÏ{F-é®ÙçÇ—‡`Ýr©ÍÅk‡é3»^•íù1'},˜Pþ“ÆÔË“h|i'¸¯¼éÓ¾VmªJ[JŠ#:nöëÇnêçnÂü·¾Þ³Å±–1êÄö+¹³ sL¤1¨27kÏoÛ”1öÜSÇWot¼5ŒX¥Oë5V]-ªúšëXdH¢6üøµb¥G;,¼ë§|1\±• n.Ð8ò*—N‘ P"#®¿. 8× Ðµ‹d„þ:¡]Ê'tI'Ñ|(W3û©’-~Ñö:öÏ¢¡)…©ë>ºÇ)¥6䪰’÷rÀвUÖŸoSc†tõçÄã›ÜEµPNíÙ5A×GuU‰“½#]9ñ“d.ä#»»Xá±ÄCeig£X;'”|¸·q¥e#Jj÷ mV®ÈÅÈßXÅ%˜45(˜ø ñ °OÓ¶EðªìudÕ²ó®—9«‰…48»Zkäñê7Û#¦ÁR”+s’Éåµã\ºú¸ÇÖ®ÖrU›j´J¦>ÒMË>§ý2ß\ãOÓ‡æH.¯˜Au›Pì6DO6Ä0£]=5á¶¢-¾ºmÅ6:µcp Œ*¹ö?½^ÉøÌð¶ìbúƒÓpJÚ:ü¹æú‚[3`û Œ/|l.eRØF„Ú¤\ãžå—8õ(-rÙúT±ší±8[7NíÔkH­j‹H69n)=³ý…Å\n>¸U¥I!^²ô­K"5HŠ}d§R)ç<ÿ­¼{HvÐi#¨7G#„ÈÏ>1ÏvF™ä1aRpãÚºæ–Ô*m]¤wæDm ×plŸ„ð”H…ª›eãWIœ†Jåk³qcbT(:±úw®ª§ÝLÔVb¾V 5–ÔbãÛuWdLwé¾==4Ÿ^6bìWÇWf!ø7 êŠv,6W‘²‰ &_Vèx«~:Uij æPoªèt·ti1Ûi·oÇׂÄ2åCIYÇØ–'Û1½›áÍ‹Sß·®v`w{v~¼;« `n·Ú±”Çí.Ä‘Z\;KIJ?qL‡R=x°#q5¬3#ÉÕu8êV±³2¹ N¸â8~éê}Á×—Ë‹6n_Gqkǯ෢kHw®êÅ]iÏF9m™÷zëœSÅÙ}REL„Û‘«ŽíÉ*V*IsnÆŽ2±'-ÖåøñŠš9EÍÜj²‰öé“j¾Fá[=è ª`5&\ŠîÓŸ¯²ÅFÍóa²?+´=@¯»Þ3öÐuà¶lõøñfÉߥrÆ2ÍŠQSî™ââ:&ǵfq©}½^<„{º»3Á•1Æí¶’Éz–{Ùî•V:Ð×çSBÜʵÓ2Ë™±~ÄUí°ö:V(Qžƒ¡}|´â‘7% µŽ¡Ž¥QåF"q×alz Ô‹Áˆt,‚gÔwkðã>]ßNs˜Úøùý¾è­ÑUē֎¯R.z{tÓ×…ät¦Wt슉¡ ©iTg¯ì‰}Æ7X54øðü9\¨Ksqíë'Û°¦•µÚ>ë÷Í‹DñT¾Ý¿¯ñ*»=+ytäÔdf²ú ]=:ÿv(@nöúý<¿ÀÿÚ?!ü6ýÌv–Œ„½ƒÏ„`JH—2ÆCg©,Í`:!ø'›r‚Eƒc.W\‰þ)JùPƒqHà`ÀîK éÆ(@ ·åiF§_„ç@qÆCÂ*'=ÑÏc&™u°|„í¦·‚qÔä~µ¡2±iO `ß;)‡Í40O!ÙÑ=Ì÷åCÀd:Á>¡mÑ0œ€FÒýGcV‚‚m€’€zž{0ÓÄÙ ð|ЩÄC’­Õø{–uÔæVR!F4R¼¥`xìpn³¶P È5´ÈuŸ€GÃm$‡)’—’ R¬!=ÎâøC¢è± b6  mƒ<1 ƒSÍS ÆOò€¨IŠ|û-ZÊ_Õj…xs¨¹4¸hõ”£Ž®Îìß!æL4å!‘ ’„Ö¿ È ö±Fm kË\¡7“L‰ ­XÝ{˜ëˆZ‹[Çæ 2^‰ò2—¶1«Å0¯ßšŒâèl”ÔÆ² ‹p&)^åuQAùÿįvóÄ–mh-û1³ä$I#¹ò(ʇ"÷hª<ȈB~PäÙîº'xÙ9"jÏhfƸéç½ P0]E8aë J%ÎWf«*EÉK]ÌÊŸ”xoBŒ(†Ass0 Ê‘ðµ¨à°u~Uß/Äbÿ'E‘×E¾øc†Úx;cDÍŽÑ"éKJ‰YÍÁ×ÊôYõ­-m&_™ÖØjôutcŠ‚[Ñ0ÖH^ƒ„Í»ˆW<^ëRIƒ2e± ……&œ6W%˜‚Ìj„"ÑØœOfú€m^Utãc4ÄÒ&—1(€À,ÛÝÛÔg‰¥ë‰c¹éq}¢>~nÖc¼á€ Mµ®90ÛU‚˜«ªÁ#( «‚RÀCŒ‰­ï,l]ÙñŽ^æ©ë~–m„îú«|nD{bAbÑŽEeî&¾¥žSÆð·ˆˆ@«Þ±ÐHŽˆ÷²L<¶d !‘ÙKá„ð¸ÇŠóå¢'èN¥vÌPò L!³Ç™!6¶o g'–@)(É‚]BO'Ûo ©ë§ä)ü9r]¶uûË<8‚ŽÄ>¶ç4ë\ Ί~p“-H‡m9×Á<^ÌË5ZÄ~Áª#Þ®¬î44ÞÇva Sð‹Ä÷¬# ØP°HvàÖjÔñ‹†Ek`C¢òb®ß®tW4(ìÄ£´6µ~F˜¶êKÉðGA¼Y.7<æÈƸM™2Dцà†`|«n6!ÿ¬\… ŠåYMfA9Á癘‡Nqá†E¶¯ìžY¦?¡?Øâ†°<~ª4ò¯ :öÁiczbqD5S|¢×š“q žïùoð~¡cW Þ‘Ä`-„D:ËŒ Ò]Ó*õ½¶þwŸ œÒæ0Àk‰p¦…˜'.y¦Ó¸âN–]H'B1§Tà$qËCñrÐ[Äbd|6ØJ86§´ ssJ,—TJÖÙîã®#øÈЪ¦§/Újø…Æ/¨Ûã×ä[›ˆZjC¡€â$bè°K^fÇO‰K’Jš7ƒ4î)¸Žæk¼ïít¶¯»þ™×Œ˜ZŒà|`p›¸fœQ8¼›ålýTP ¾ÎGl4á2õ_s}B–«É‘¸é¿™ú9Ex8tÈàì1ê~\C&`BzZõX.¯Ä‚Ÿ(û ùqoíH]§{XØå¢$ Û®ñ#O˜x²h .=V€ÕŒ”AÜ¡šcÆè`ŠÐ öAw`áž7Œ#èØË+¹€ ¡Å‘ˆ0³Z&²"Š›d»à×jjA&–4äãÄåÿøßÉhA¿gB°=_Ï`Ëå¤VÄ¡ÝÓîÖޘⅠ:ÚÒéDÛž³œðúâeê¹/ÀÿŸÿÚ?!üqÁ;:Á¥ÿJå2™LŒÜ¦Sy@ÊYç>L¦S"Ì®S).]\A=e%ñ”Ê`å2ŸâgÝççåæV¾²±ë.üdÙ€ÀôåïúÃ3i×r¯ï•ýç6ùÈO¬ŸX/í‘!æa½=g‚eNyÊïÎ Wç&Ïœ5Ç+_xœøïæö@šÎU’K)Æ÷àÛLö…by¶X¯X×ÓMÎ_3ãPù ~åá0„6‘u±1wg1*¾0ué¡ãôø|â]âèë‡ÛàîhôˆÎÇÌóë>e˪ú·¿Ð;€ý•ßí–ô’ÐK³Ï§¼\ Á£ßyó“ ì¤þyƸ ¿^óâ]€ÿn{MÄRvïSÍæ5(B±§×‡¿à”Óîxî!˜ÃCl»6®åÆÖžÓþÎ!Ûú_ôÿ¬xÅåŸì¿q÷Uˆ§ðB‡ˆ>¶á&ZeOË+‘cckLßÛ›EÞð¡Q”/¬x}ú‹à<–ÁÃhø0/“®š"wÜ(J$€ëäz"ÛeÃiTܱ;¡_]";ÊÑKh8=ÀY¼¨ Sö.hºówS¡n—[ÁèjçÒ¬ÃÖo*˜ì7Т‡sq«´+ÃXâ«vºŽm döHÉ5¥Z‚»Á®™Â„}ˆXAãq˜†€ª0¹¨_X„¨¨´‡Ê‘¦²’O³MïŽÿU§þr8™k‡ü ~JSà\Ü ¥êåz:˜æ„ »„.Ø´|æ‰#CO"ãWƒk£K»ìb÷woÒ ‡Y8RKÐ>Z׳×J$(ÆÌ}纾 6éÞæâ€$æÛÃɳ *â¨ÂHþÌtá¯*4ùÔîQN)€ Ó ×wŽç¤‡CÉfðçÕ«vƒÈ#£'#´.‚¢xOŒ8Û@’àŒ(I,°7Ñþ«ë借”øÂ6GA¾ª–s Å…@BãÆûÊÌ€F°Cìðü~ï°~sšüµêÉ:_?³¼j¤MR£{ÄF ¾œ˜ çÚ´Óµ9E,ÕÎG¹½òöw‘ŽïµÞÐ:;{¤ö·S-¬[öLÀb­õ¥Šv¬tÎâ”°Æšw÷µ9•^Ã’+Óoo óU9t–«ãßìkQÕ,#¤ÞlŽª>ÏMWìû>r Fû>1wÓÀ™()(Ñì„Ùí3÷oS¥4ÓhnŽÑ¿,š÷o‡È©Ýëú­Ò2TW#Ǽ³Û¿ cÈ_'¦2Ê“üL`|HÿŸÿÚ?!üÍsüfOðŸ™øŽOÄÉ“'â„ÿ>3üW/ûË—53Î\s¹Ö\÷”ü^þ<æ®_õ—.jgœ¹ãò•‘óõîµÜ>>øæ¼Å(òNßÖlm¶°ß´ÿßõ–LTŒB)ëFé¯ËFº.¢‹9ë/Ξ=ýf×AÝ>yŸìŽ<¤ê >óª¯Ãã¿Æ.‰+8÷’úùÊvNÁÖô@¹¨ÚsNï&lŠ7Ãã¿ÇœB!—=ýebδëïÖqc?ždÛáÈ%>_¦…ì:ö"¿xëç=òPz8f¾¸o£×ý§ý0®åöÄ!½þëýgƵ°~{>UЍ C…Y,WÐ#` %Ðn·eÉ ‘Ó,k•ÓÉç EGš}¼’ìËýP í6Ü•Ô>3œ¢8 X㈶õ¼‚ø,: …ÇÜ´ŽømÛèõŽ8>ªP¡³ÄÇ‹„=EÀùy.Öñu£cոѣêóФöVÒqïgÎm \›g<Ë®]Ì´4î/«CÞŸ8•ìC¤EBü1|™0¤B§¥WJ2¾0ËJ[ð>0Ÿ3Ó_øëËõMÑü/ôâí7ÙöþLÐ=®i ¸;kÛ©×Æs"ŽÏ"±7¬Eº* ÛöáƒÛàöDn.·‹[רÃ)úI×…~À8xžPò9pmY§°fŸ?¶( %Mú÷ë-¹Í Oüa¡ À ¾QæN@PèT³ÖæÎ\§cÊy˜µÛ€ž -°í{}¦vâTÖ9ØB4Õ¡'Ä|0,ìò‚OzÎ2*¶€È¯ÃX=èhÿ½eZôiû?¢3üRé#„è$Â*¢4óŠÔû*ò¦ÑûzóR{ÀÇÄýïŒ=MÀ }&3[ß'û\ž&‘¨ã웿.·¥NüÁ% å)2|ŸÞIËñ]g½³^œù0ðËÃöÀWä‹Á6"xãÆkÖkâ-ÏI˜)×Í9ý0/´×¤êŽÖzpŠk_´¦Ôæî^!óR€ð‰ÌžpÀØûsšž[¾±¡#²ù<©>-ñ‚hnt·ÍøÍ¯‚/H^u»›ûßèÿÚ R”—mÅÚ¥}Q Aómü÷Æ—±½µ%í.§ðN˜˜ðõ­|94$L<7ï,¾ 2ÕÕ 0 `¸õ-PlÆÿÚ?üÂrh9­ºÅ`ÍÏæä¤W¦¥‡L†|X? Ò?yrÉTdÂúû|† ‰´hªÊ¶Xþæ_Jã-rVTK0}°Î×’­qf¨"}?8P„®ßÉn“m”–·vüãÊî¹ÖEAWf6mP°²—›P°$кÍ»ÁÜ€ÊLý”ƒéiC–íêý¦Xª³•›^MÕl š·o¡eÕŒ³/5cu+Ê!H8+,ó¯š8Å'WdìåðAóõy&]I K d&˜³PÚU¾È8’ð#¯zÐ~äÇ„ ¼U‘PÀºõ`ÝäsÆ–q‚¥Þ€&«p™¼’ÔþM–uãÂaÌ}ç:UUnŸ=š¿!-ñžð Zœ«ÑÁ SùjÛM6»Cx£Ys¸}I ÈÆùv‰¿t@Øf«GYÙ™Q&RüåFâÅ60ÒŒ1ç«?þFÈìi 7"‰>*hy¸H³•£Œ‘\ú‚$y$p›²_’ÀN§£Qußz$ÑoÁ[o,=š¡~, ¼+W¾\ôèg éßJ§T†TÞL2È+ˆÀñuÄLC› `óTÉS.3»Q+ÞW%ìaí‹¡{X¸–ojLs QÁ°àsƨÄl˜FÉâÄnÔõpœë9pÐâ)† ë0tÇg°1~H¿®É?8T¨a JõAIÀ«\F™piQªðÝV#(:Äê­ö"AP k{',PcCHœq<¨Ê£«1õYä§Â*‰qq¢p€†ö!pr¸O¥¥×Z%å˜]Ao]ÎÉY 'K™¨ÄIBiÆ gÝÑÄøCHô‚ªe€„Zaqnü¦`‚ô¢`ì!N’+ â{0sØÂ"¬ŠtktíAèÆ³T6êi'# €4Ã%bÞ› –ÈAPó”]>ÄK([0ŽKiâÚê!‹ÞÛÏÈf… “ô Žúü/#awùòÒÔ\€ˆF3ûºi±r¦ê¤Fm›À6 FŸ\¯ÉõaÈ.aÐé[– /¡°Áµ v½ÙÞP¸#ôÚº¢ó¤7dP(DᆟZ QF^í×–Ø(æ—+8£%@‹W%'Ó2©ÃKi숴éÖmQŒÖËlÄ!Gÿ f6×$ m JßU­×(QìRÄ(ÓÆYçeøB‰¬B¢å:SÛŽ6Ã$ûí¦&DöË=Åm#{Ég†€ \$³BÇ (›Áp»Ièúƒ}®üØê ¼E¡w$Œì„mÜKI ÏXKóï6 @ܰ$€¨e0 Æb¼Mý8sE7‚ŠÅN"fê#NàŠ,è?º T¸™ý $A ­­°ˆi/ø)±º¦<ÿ<¾é2B€&[^#©ÀXÎΈÉÔ$6ï‹­Ånð‘a€·ÊFp¨ŒÕ{˜ËÒ5T•~CÛ_žìPi8pÅ;ïXëVM*¾2üÆ&‡•+>L ܘ ´á¹(´CRëQ¹]êC\Iº9ª¢‡ f‰î]¹¢·S¤È€Ä"ˆ ¾i¦?£dÔ±)1I5à!Á-!Eµ°Pƒl—µæ*Oôüw/BŒ>$.nª áÔÑCA#p^k]èx2ÕŸÃ%¼De e„ß[-0“Ë€Kd§ÖÓ‘8`!m˜ã”xÔ „‰Ù~Ô(HášÏ…RÀPzà9púÕô#eŽÅùŠ’Iqx>Bàå‘û‡È÷$©UZ*ÐÅж«ßg¾nÐÚ]Cµ˜d§*«¯$i^¡È³ Àìã0(Ʊ+Íá⊯UðסF«Ö j§ð|½¼ÐwÑdþxÆjè#ýÿÚ?ü/üc ^üÿXsò°¸°LO©üâ}Bç’Î¥)€ ;&pÙ¼èUNç˜>²EûÄʤ~sä9œþ¥Ä òbeR8Úv^„Á÷<á„ʤÍò”Íÿ ×i×iðæðôâÜ ¦½c‰ñüŠƒÖ)ÅšÊq¾&rÒ}9|WìÿÜ#‹êo4Ò;0Òð û·ÿÌUQ7?ë REuœS{ßùã y¿¶!SŒ¥Øú¸¤š×é¹Nmûóx 󟾲!qS éþ0j_Ž`1”¦}óøÁ2Ð×Åæ£Bõ?÷ »xl&­žŒ]=¢ý¿÷òŠTÿ—+ýµ€*è£#4ª4ËAJˆËƒiC¸  ‘mÉœ¥¢AjÅ:²C6³Ð(ÝÚ¡¹D:éùóŶ]pˆiÇ@¤U]m](k¤ÆWI¤Òиœ#ÐZ/^Hô ñ(Z«¤€°°+#ž‡{æ_Av°²b‹ÀIA€‘mdå¨Ù…ˆ°(8Ô^Ë‹M"µunVT¼i!ðÝøÄù{ñ_$ò9z{ÍnÛ£ž0Çx9c-ÜìIÝ8‘6÷tk¸ÊhLý¢Ë47óÒ èzaó%º1߱ɎçGCµÞ(}B­ðoú˜euV•¡(;ÐpÅÀ ùäQp„"„ã[¬q0ñhbF^¬b§{Tu}B±­)g%˜!$¬L‹-e¦ $(ÛsîpÒŒmz¦›!xÒ,—è)Ø‹‰DäLÀ7ãñ݃BL àY‚h?¤Q"nŒÄ$㢖U˜³z^ 5…‰ÏܶQ`3ˆ€….Íã“  Ô\¼U”{[¯ŸÇ=Atšƒ5ñÏÏQçc‘+] ð *µ!;Æ# —щ„)€! áäZ7]Œ¶‚:÷ª¨P™€C¡qÒ °i2ÞZÆ/‘ ­7r¥Ä" ”pµ¡ø´Ëüa…Ð3MFÈ€Æ =à vzV{„G¢äíEëo"ï Ëp".)0ÒÕ¥ä~ªbN4òϬgE5‡nsšiÌtÖžA Ô%©YBð%C´rTbÛ6ˆ²á±|’¯åõâ9mYã¨@½ˆì´àâMw\øEɃfàuèðû+gÑèa]€øÀpüø"ÇKFéa™9e5±éÙõ]zLh’ W)*kñ #Ì ø¨Òal¹~|¤”ª ÛF •å#Þà–!C·&ÂoÜéBZ "ìƒñ•Àâ0¹*“ºáQÖ‰‚Ðø™ë9Ã/qÙnî¯T"Î6 ¼Lù°RùTk‘„€ÝjÉl÷¼0úgL‚‘À(y%ö¨R¶ ²Í8·Â@)n 1Åø€Qd5i€D5ò"‚u$¿·1§P0 øÎí:rQÔ**6ÚO&ÀŸÐŸÿÚ?üw<ÄzøÄŒ&*Ãy^²/ ŽGÖX£ë#ˆÈ•fF_ð9\›ÈåK󊰮W&UÎFéä26yÈúÈöb†yÈÙ7‘³Ïø¾<æñ‡Wòw"¿)‘áÀ=Í܈ûÅÐ8¥7¦NŸ>qj_œÐ7"_¦DCë(»òbˆ—ÍÄpö`о.:ßÞy.Eï¦! ø7ˆÜøÅ >1JoLF÷¹ƒß“_“ࡪžÿÛ¬›j—(‚®¡"ê+K…РŠbÆžB¶ÐeÕv)|㑧Ÿ²€ýÖ¼XšD€*` NHŒt˜tUàWÁŠ‹ D×d5´:tó€* éÆž8t¼3Ób6JUö“?¸‚–Y,º¼¹ÉÕúŸ¾BÕh"¤JpÄP ,XklÜìÍÖÔhî‚ëÎ…óÝ sÎmbJ%õA3Ë¥ÁH¯‰ß!·Ø¼‚Uðtó€ždµ Ö¯.°_6õäò÷0P+a½lšmu/qY+BK; Yæsôžv;ý óÇäR®hËâ•:"‰«¸ß¡óÖ:¬xPùé>?¿Æ$òEŠ_äÁ¡›B? ¨ú¯¬Ýе{ §¶‡šô‚Ã㔘•Ò– <æµÉBˆ7@+lÞ*á™ÐäŠxkbÛò¤",åæ4ë1Õ£%•j,Pú€E¹X Ý KG(o¬)é›,Ä´`‰Y™] ãžkMˆviÇô•ÛÕMÔS72¬ì,-ŠÃ¦Ì)YçÝSPøAÀ8³èa°VU/ˆåöB€AchòÒH#gnXKI}QË“V9Û@BJŽ_ADHAÓ°<¶N½ýPàn“Ô1æŒ@½*ýÓòŒË7Qû\gJí ¬`Ö  „ÞA! ÍBE!\ýä_˱iMtÀƒD–¹Ò’‹‘0éD4Eûdݼà €DíÎÉ}8-AL1-ª‘§ÌÁ1ÖM’tCªQ\fj,Ó¯!˜‹àA4Òk½™ä¬ jFÈØ ƒ˜S… Žö/ w@”£Žö3sγôš•x¸iŽÜ*°‰hÇEªØôžõˆŒˆ•Cuü¼a6ÐþÚÓÀÁ¿óºT‘Ü0vÌE$EÔ|Ø| éÇylmÈi='E—Æ&hÞ*µïäì~ÊSe¦8 ÅÀ¶^ÁžbV™VÕyΤI!_lR·Óà†ˆÜRtÒ€CZ x è$øÝÛÁ$mÁƒlŒu®˜X¨xeÆ€l ¶/1Ãɨ"©*jºnïð;I |}LH¼öV,@dÕ}påMàÔ !Ú•(†Í+è$y9ˆýlj­£`¢`v®e~¬+2:…óÇ“w5Nªa²Ø­éÅ2³Ár¥š ãMë¸üŽ$Jxho,Îû&Ïg…€áá^Üô…ë|%<ó+šIæ?ÏÿÙhpcc-1.4.1/hpl/www/2rinM.jpg0000644000000000000000000001512711256503657012441 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÀ ^¢ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ?÷ú(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š+ŸñŽ£ªiš,é \6¡g yÒì$Œì½¸)ã€Å‡*±câ[ Bò;XmõT‘óƒ>“u ywŒ(éÜóÓ­s~Õ5û‹ÝmWVŽö sG}KÈ«´pñ‘•9)¶à/ϸæ<çæ"¶4‹Íi¼k¯éúŤ–pÛÚÏb¶ñ*’4àïÉ$¾cƒ‚’+Ÿ»Öuõñ¹±³Õ$¹¸]N&•kn³ÛÛØ2`¼óÐÅ)Û$w·;+šî5]VÇCÒî5=Næ;k;tß,¯ÑGó$œ$I®Ç⌵‹8õ#á´óé·{i§Õ¡ä'kØer0{Žx$`›ð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øQÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…ð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øQÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…ð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿ©¯Å{½#ĶZO|1'‡"¾B`¼{下v@Ã2¨ =NNÜ© ¸zeQEQEQEQEQEQEOTÒí5›²½ItÝÊñ0dpêC! e Ž•NÇÂ~Ó/#¼°ðþ•iuvM”qºäpÀdd?ÏŸÀšJ¬Ï§yöSy5ÄWïŽÞ9D†Žða€eÛP28à ÔÔôU½·ÕÒy,µ û/±ý¶2Û¢Iå°ŽU¥sÆ=xÁÔt¿ x2ÖõKP±‹ÏYIµ‹·J24í@6àåWÀ¯?Öâñ׌üMiªßøæóÃV„Éc£Ü_Ãk½ú n‰$ã?! ã‘»eÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…ð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øT2|Eñuk'м.¤K'—6¢ºŠ]%¾xRá€X¨É#¯~•{^ñÆŸã êÒçíÚmÖ™rñXÙ¨y/|É•QˆVÞeë»+WÄž-ŠãÂóÜj1êþ¶†æßÌk´X¦¼]ÛŒ4s¬Û0Xœ*’OeÒøsw-î‡}1¿ûU©Ô&û">¢—ÒÛÃò‘’¡ ¶K0œªº‚ÇØQEç^ñާecumy£kZÝÄ:ìou§Û¡Œbæ@ç›pÀ ÙJŽq“CS½k?éöÖšö¥}©Í¬ywêŸd€• ØX$F6É•FåÚeÉÆÚõJ+ÄþÒü]¡Í¤jðy¶òr¬¼YFÐ1(ÿX0ø±]%Ts‰šÞU·’8ç(Do"UlpJ‚ 푟Q^wá?Þ¯ƒôa.‰®k’ýŠú…Œ1¼r¶Á‘¹¦Ü]NUÉÆ][ÐZÔõ-ROˆú7™öËM6ö³†8üЗ»íewyݤ+¤a98Û##¾¢Š+‹ñ6µ}¢øãGd’K‹9tû¼i–ñæk™H>æéKÛ‡UIzîùrüIâgÔììLö·ÞŠ+Õ2l_¶™Ú˜eð †ÚØ8ÎÞõÕx.ú=OÁúmìV·–©4eü«Ë—¸|Ç'ÍbK©<«g•*FÞ¢ŠŽq3[ʶòGåäBê­Ž PA#=²3ê+ðGŒ¯.|9áûiô½_Tym­ÒMVÞ$krÌ«¸³4¥‰BJ¹Æw+p:V_†u»«Ÿ‰RE>¥yu$ך”M ¶¦²ÇoRlgµ(<… ²+fo˜ø­EQEcøŸÄú_„t9µ}^*Þ>W—•ÏDAÝŽäI G•¯†¾%x£Ä¶^4¼´ðüF$-§i:´“°°‚¬Q7IÎHáJ¨^³þ.ÿýHßù7Gü]ÿú‘¿ònø»ÿõ#äÝG<Ÿ¡·–T‡ÁSº!eŠ3rÈtn dôäêEeü=ø×oã=rÇBºÓ~Ç}5£;J®JI:ä”EÁ”òÍÆ óÁ>±EU]KN´ÕôÛ:þžÒæ3±±#r‘ƒÈä}G#µxMι®üÔ[I’?íÏ Þï—LYgÉ ®vœcpÈÆÒHa´–Zõ‹¯ˆ²ð4.¸ºÛ¦Ï¼J ´Žäªž ¶H‘Éè"ø­â­-u‹ /Ãu…Öµ‹QûGšÑ |‡ÐásŒ€Ôÿ‹¿ÿR7þMÑÿþ¤oü›£þ.ÿýHßù7Gü]ÿú‘¿ònªü;øµmãJmP‚ÚÇW‚1…ŠäIÓ©a'”FAÃæl†8ÈRÇÒ¨¢Š§ªéV:æ—q¦jvÑÜÙÜ&Éb~Œ?˜ à‚90Ex]ι®üÔ[I’?íÏ Þï—LYgÉ ®vœcpÈÆÒHa´–Zô­Sâ~‡aðæ/Â%žÖàmµ·aåÉ$¹#Ë9à`«dŒŒ)#wÉÒ¯þ1jz]½óØøFÄΛþÏv·+*Û‚’Ç8ÎFpprÏø»ÿõ#äÝñwÿêFÿɺ?âïÿÔÿ“u^þóâæŸ§\ÞÊ< c·‰åa¾å2~g!GN¬@ÈOáŸÆX|sªÿOŽÃPâDtœ2\2ýðªpTÿ\·²~^u5Ÿë—ŠÃ>µ±½¾³Ëjw×ÅþÉiÁÄd¡É‘Ðg# °ÿ‹¿ÿR7þMÑÿþ¤oü›£þ.ÿýHßù7Gü]ÿú‘¿òn¹=k㉼¬Ýi^0ÐôÛ«†·Ž{'ÒæhãÁÜ_q†þr˜Á wž+ø‘¡x[ÂVúü“}¥o¢iöé•{­Ê`•\KÆz@8zUÿÆ-OK·¾{Ø™ÓÙîÖåeAÛpR@8çÈÎ@¹ÿþ¤oü›£þ.ÿýHßù7Gü]ÿú‘¿òn«ßÞ|\Óôë›ÙGŒvñ<¬7ܦB‚OÌä(éÕˆ¹ð·â’|BŽîÚæÎ+-FÎ8ÙÑ&Ü'!ò`29Æåäæ½Š(¢¼ßÄ—^*Ôþ)Çá­Äߨ¶«¢ ö?`Šçsùæ3÷ùwíÓš±ÿ—Ä?ú)ÿù@·ÿä|m¡êZ5Æ«x³âš£ÚÎÍ`ÏáÔ‘V\ ¶Ä$F tZVã]sK·ÔôÏ‹1ÜÙÜ&ø¥Mß ?<‚AAV§Ã-[YÔôízsRþѺÓu»‹¸òÉ@>T$žýz×qXþ,¾¸Ó<®_ÙÉåÝZé÷BûAÚë8<:סh¿õ¿iš·ü,Ÿ'íÖ‘\ù_ØVí³zÛœŒã8Îr÷ß³Ö£%䚥¯‹ RLI‘¥­¢,ŠÌyOˆùæUÏ|Z»ñ§‹þÓ ·ÄÏ.òÎ(^ò4ѬçŽ7“~eʾ6g#³ á·*èkcÇ~Ô|75玿µ-oõ»[ ­ÿ²`ƒ)!%¾a“ÑHãzñ^±Ey¿‰.¼U©üSÃZ‰¿±mWDì~ÁÎçóÌgïò2ïÛ§5âOƒZ÷‹¼³®xâ ¹#ÀY¿°aŽ@p»Ñƒmù‰Ûœdç¯0ñÃÛx£MÒõßÁšûî­eFçÊp--±$r»A÷y`€qìzVã]sK·ÔôÏ‹1ÜÙÜ&ø¥Mß ?<‚AAV§Ã-[YÔôízsRþѺÓu»‹¸òÉ@>T$žýz×qXþ,¾¸Ó<®_ÙÉåÝZé÷BûAÚë8<:סh¿õ¿iš·ü,Ÿ'íÖ‘\ù_ØVí³zÛœŒã8Îqž#øG©ø:ÞóÆÖ~1´ƒPÓ\GåéÑÙFHv€°.ݸrØ?xš×ð^­âOÛªéßãþÐKtšæÈ膄°€-·xVà²ñÓ¦EjkcÇ~Ô|75玿µ-oõ»[ ­ÿ²`ƒ)!%¾a“ÑHãzñ^±Ey¿‰.¼U©üSÃZ‰¿±mWDì~ÁÎçóÌgïò2ïÛ§5—â_„^$ñ…¼kÞ>Žñ rñ¡ÂŒ„Œ2¸8Ñö|ù~fí™çnsŒóŠáþ É!пíãÿJ$¯@¯?øÛÿ$‡]ÿ·ý(޽£žF†ÞYR'tBËeC9îÄ žœ=H¯øm 6™ãÍ ãAð׉ô˜>Å“«[ý¢Æ}¾d[Ù7m`Õ Ž@<ãÿáI|<ÿ¡{ÿ'n?øåð¤¾нÿ“·ürøR_?è^ÿÉÛþ9Z'ÂßøsXƒVÒto³ßA»Ë—íS>ÝÊTðÎAà‘ȮŠ(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢¿ÿÙhpcc-1.4.1/hpl/www/2ring.jpg0000644000000000000000000001602711256503657012473 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÀ P¢ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÚ?÷ú(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¬½Ä:o‰l$½Òîcžî%·fGVù‘Êçå$`€z«)ïYúÿ,|;zÖÓÙjW"qwy5¥¿˜–pØ$äd9†lFç«Úçˆôÿ=oŒ»õ Ȭ­Ö8Ën–Fz ·$p§ü*ѯô«+Ý8§Õ. ½²¶¡2å‚Ï2ƒŒ€¼gætè×>|6Эíü)ww=ÕÂÛ[[Z\ÎÒK#p3(P«1f §š4?…ß µÛ{—O ]ÚOkpÖ×6×w3¬‘H JT‚¬¬I0æµ?áI|<ÿ¡{ÿ'n?øåð¤¾нÿ“·ür¹Ÿx£Qø]âüãYôII6±!ùV<àG#v ýÃà*Wاžn.%Ž"BòI#TP2I'€ç5OCÖlüC¡Ùjö¾Öî%•2A+žªØ$ ŒðAŸkâëKÝz}.ÞÒîXá»k&¾O- óÖ/5£áË‚ ’ sž+ ¢Š+.é¶Þ%³ðü·1®¡wo%ÄQ—PJ¡QŒg99b09¿÷h×5Èt+{g{[»¹î®ÚÚÚÒ0ÒK#p2B€Y‹1<Ñ¡ëë¶÷.–·v“ÚÜ5µÍµÜadŠEàà• «+RA 9­J(¦É"EI#ª"ÌÌpI5GCÖlüC¡Ùjö¾Öî%•2A+žªØ$ ŒðA]â¬Ì ¼ê½ÿ.îl§³‡Äw~]õ¢XêÝ“ÜMÉ#amTb&‘ n6à2cñ–­ý•£ê×Þ$º‚ËIµÔ,åÓÄQošçÊh§+÷þfwW@0»B–9Ž?BÕ4OL%Ó[NÖlb¼µc»dƒî¶ܤr§Œ‚&¼ž-c[ø3¯A¦k·7:§‚.ˆŠÊùÆù,qÑ'ø{¨Ê†Jí4ÿÍ7‡´hbñލ‘ZØEoºKG3Æ£ä“ $ä©\Ø8È¥ ÓdðÞ«­â/ØI`’„·:’4L„DQB¹˜F$#{3òÛŸ À]øNüÿC^‡ÿƒøª?á;ðý zþ aÿâ¨ÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*²ï[Jñ–·hÚŒ­kkIâ™tÉã–ãËw…·#!0Ñ $«Ž \‹ÂÚµ…œ«§øÃU{¦–)MI"¹@ªNä*Öœ0n‚0AÏðæ£á?XϤËâ½ %K‡/n—i ví”HšFhÆT’ œñœ øNüÿC^‡ÿƒøª?á;ðý zþ aÿâ¨ÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*±ì>^Úé–v’xÏ\ŒÛ[Çn«`c†±¨E!\‚B‚~c–$Œ$šG…<âMZ]RÓHó옮ïJý¦Yž&y3#üÄlŒ¼½6ŠÔÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*øNüÿC^‡ÿƒøª?á;ðý zþ aÿâ«.õ´¯kv¡øÊÑ&¶´ž)—Lž9n<·x[r0b J°!ˆàB^øææÒÄjIæf‡ÄPßÂÿ!Qò…”ŒäaÔƒxßðΈ|9á»!¯e½6±ì3Ê-É8 8U¨ûªç­E^þúßLÓ®oï$òímby¦}¤íE±ÀäàÒ¸ø]¿?èaÿÉ+þ7Gü.߇Ÿô0ÿä•Çÿ¯?µø™àÿx¢}WÆWA¤Øî‹IÒn-dŸqaóO0Td,FRH^xÈ ÝE‡‰> êzµ…އ%ÕÔ© )ý†FçbŒ˜°2Hë]Çü žÿ¡SCÿÁt?üMð‚x?þ…MÿÐÿñ5Ëø’çá/„uì5Í'C´º’!2§ö0“(Iå##ªŸÊ¼ƒâž¥àMGìz¯¬l®¡ŠXn µµžÙ§G€ Fî´›²FTãžßuHþi¾´“ì:‚¢K«9â+a€…B.àÁNÒ@$ÈË7Ë×øcUø!áÝ ®¬uKóMw{¤K#Èç©£;WŽt÷$“èÓ~x»N’ÿCÐ4;»Xå0³ÿe$xp#€ôaùÖÇü žÿ¡SCÿÁt?üMW¿ð—ôÍ:æþóÃv¶±<Ó?ölGj(%ŽäàÒ¼þÿÅ¿'Ó®bŠÏCy'U_ì‰aÉ €<Ä„²¼£#¨æ¼ƒáߎŸá׋e¸íúdÙ‚é`fQ"âT ·æ‘¸‡ËœŽ³Mñ—†üoãñÄM^;{+'+¦èkÒÆs•BtÏw#BŸC°ñ'Á]OQ¶°³±Ð人•!…?°ÈÜì@Q“Ik¸ÿ„Áÿô*hø.‡ÿ‰£þOÿЩ¡ÿàºþ&¹\ü%ðŽ£†¹¤èv—RD&TþÆe ¤duSùW”|S×< ¨éÖrø}*Îê4Oö[)íf™(¥cTe*_vóÐ`H=ÃŒ~Óü e§x›Vû=õ—ú^ÇåçÔöð»~ÐÃÿ’Wünº ø§Fñv%þ‡yö»Xå0³ùOHÀz0üëb«ßß[éšuÍýäž]­¬O4Ï´¨ –8œzWê¿>ëš]Æ™©êñÜÙÜ&Éb{+Œ0ÿ¾28 ŽAŒ^ðÿÆÓ|=ñÚ’âK½-ÜÅy±!nc êNåÈSÔ»vÚ/Žü9âïMâ_ˆZ•´V–¡áÓtGµ–â8Ã]°…OÏ,ÜáB¨¯EÑ5ƒ¾#Ö Òt7C¸¾Ÿw—ö.ÍÛT±å¢p ä×aÿ'ƒÿèTÐÿð]ÿGü žÿ¡SCÿÁt?üMrþ$¹øKáF; sIÐí.¤ˆL©ýŒ$Ê@9HÈê§ò¯øµàÝ[Q²¼ðséQ[ÇŠXm,æ·‘Ø–;Š”Xö´d|Ùcœ€1ÒEñ¶çQðváèI¥jŸgÔµÉQäò¢^F³´¬ eŽ0Äãܽ>•âZN—o`ƒM¹¦ß>ïH’id=K3´Y$ŸÀtwš'‡þøGƒVÒ|9¡ÜXϻ˗û.4ÝµŠž‚9¡ÿ'ƒÿèTÐÿð]ÿUïü%à}3N¹¿¼ðƇ­¬O4Ïý›ÚŠ c€¹8ô¯=Ô¼UðFóJ¼¶‚)fãIE‘ ±R ±d`÷Šòføâ NK†“W³¸·ò?väoh† `λ‚òã ÙÁ*ºO x—ÁºÞ¹qâï‰:Üš¤¹ŽÛL{)¤‚Ö>@Êì*zœ.HÜIs•ô}Wø;â=b 'IÓt;‹é÷yqbìݵKZ žMvð‚x?þ…MÿÐÿñ4 àÿú4?üCÿÄ×­êÿ|9¬O¤êÚn‡o}ß2/ì]ûw(aÊÄAàƒÁ¯øµàÝ[Q²¼ðséQ[ÇŠXm,æ·‘Ø–;Š”Xö´d|Ùcœ€1éÿþ8è'ÂVÖþ+Ծ˩ÚâÆ)¦7ª6ÊÄ+|ÇrNH'Œàuð»~ÐÃÿ’Wün» [Ó¼G£Á«i7h±Ÿw—.ÆMÛX©á€#G"´(®ÇòO)ÿÌ•ÿc]þÏ^EyÿüÜ/ýÊŸûw^^WñªWKÑ?á)ÐÍ¥¥Í¥»ZÜÞ‰$K¡¯ªG´ía–cóçiù“ Íz¥yÿÂÏùìk¾ÿÙ+Ð+Ÿñßü“ÏÿØ*ëÿE5ÿ’yá¯ûZÿ襭 rÃûOC½³–7rIyPßÇæ@ÒSÌ^ê)8çŽ9¯/ðø—@ñF“áh¼Qˆ´ÛKIT†(¦–@Äiçg,Ûà CÈU? ÛÐ|Sÿ™+þÆ»ýž½Šóÿù¸_û•?öî½¼↛¯èå׎ôMB{g]=l¶Xé‹tä2Fi·œ$cb0GLqóz¦“<×Z5ÅĶ“O-¼o$–lZb “<”'}1\_ÂÏùìk¾ÿÙ+Ð+Ÿñßü“ÏÿØ*ëÿE5ÿ’yá¯ûZÿ襭MV 6çK¸Mb+Itð›ç[µVˆ*üÙmÜ`c9=1šñ߆óÝ|FŽÎÛWc¢I¡É%†–.üųAqˆãoÞ>eò€c“•·T×x·þJ÷ïû‰ÿé:× Q\ýŸü”=gþÁVú6îº ò?Šö¾(Ó§y©hÝ\D^k Cóî`É´>B dvç½Xðoü€îì+©élÕÐW?ã¿ù'ž%ÿ°U×þŠj< ÿ$óÃ_ö µÿÑKYÿõ }/áž·uu§A¨Â"XÍ´ä„b㟔°n?/Gð‡Ä1—\½ðÿŒîç[ˆtønm-®íq3Dx2yƒÆT0|±cÔpzÿÉ^øuÿq?ý'Zô +Ïü%ÿ%{â/ýÃ?ô«Ð+Ë~!ÙÞhþ&µÖ4OE¡êz¸‹NŠÉlS}>ü+ÈIè¡*Å`d¶ÓéVÜ[éÖÐÞ]}®ê8‘&¸òÄ~k€>ÑÂääàtÍpÿ¿äè_öñÿ¥W Q\ÿŽÿäžx—þÁW_ú)¨ð'ü“Ï Ø*×ÿE-tÆé ¼?¡ø ëvò $’htñ>- •Ô#È‘Á+‘É €~)ÿÌ•ÿc]þÏ^EyÿüÜ/ýÊŸûw^Xþ)ðÝŸ‹¼9w¡ßÉi:Ι§kÓkšoöuÖ¥­Ü_­¿ž“mIó!ÁÁvéÒ»ŠÇñeƧàÝrÂÎ?2êëO¸†ÜçhØ(Éàd‘Ö¸} Zø‡¢x{LÒá[yßa´ŠÛÍþÝ·]û.ì`ã8Î2kCþ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿ±õ³ã¿j>†óÀ¿Ùv¶Ý­ü×ÚÐO„Œß(ÁèÄñž9¯X¢¼ßÄ–¾*Ó>)Çâ]Ã?ÛV­¢ oŠÛkùæC÷ù8vï׊±ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü*ÇÃ-'YÓ4ízmsMþκԵ»‹õ·óÒm© B>d88 ŽÝ:WqXþ,±¸Ôü®XYÇæ]]i÷›€Üí< ’:סk_ôOišOü+o;ì6‘[y¿Û¶ë¿bÝŒgÆMhÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øV|QxËÄü+«jÞþƱÒ~׿Ký§ Æï6£…Á€8¯lW¨Q\>½sâ­ÆW7ú„ÿ¶ínôûhYÿ´b¶òÞ9'$aòND£üô¯ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü+ ð]¾£oá¤þÖ±û ô×ww2Zù«/•æÜI ]ëÃpã‘úWAXþ,±¸Ôü®XYÇæ]]i÷›€Üí< ’:סk_ôOišOü+o;ì6‘[y¿Û¶ë¿bÝŒgÆMhÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øV|QxËÄü+«jÞþƱÒ~׿Ký§ Æï6£…Á€8¯lW¨Q^_,^2ðçÄjÚOƒÿ¶luo²yrÿiÃo·Ê‡iá²O$Ž@éß5¡ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü+Cán‰¨øsáÆ“¤êÖÿg¾ƒÎó"Þ¯·tÎÕ$<ì(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢Š(¢ŠÿÙhpcc-1.4.1/hpl/www/HPL_abort.html0000644000000000000000000000313611256503657013445 00000000000000 HPL_abort HPL 2.0 Library Functions September 10, 2008

Name

HPL_abort halts execution.

Synopsis

#include "hpl.h"

void HPL_abort( int LINE, const char * SRNAME, const char * FORM, ... );

Description

HPL_abort displays an error message on stderr and halts execution.

Arguments

LINE    (local input)                 int
        On entry,  LINE  specifies the line  number in the file where
        the  error  has  occured.  When  LINE  is not a positive line
        number, it is ignored.
SRNAME  (local input)                 const char *
        On entry, SRNAME  should  be the name of the routine  calling
        this error handler.
FORM    (local input)                 const char *
        On entry, FORM specifies the format, i.e., how the subsequent
        arguments are converted for output.
        (local input)                 ...
        On entry,  ...  is the list of arguments to be printed within
        the format string.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   HPL_abort( __LINE__, __FILE__, "Halt.\n" );
   exit(0); return(0);
}

See Also

HPL_fprintf, HPL_warn. hpcc-1.4.1/hpl/www/HPL_all_reduce.html0000644000000000000000000000347411256503657014442 00000000000000 HPL_all_reduce HPL 2.0 Library Functions September 10, 2008

Name

HPL_all_reduce All reduce operation.

Synopsis

#include "hpl.h"

int HPL_all_reduce( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const HPL_T_OP OP, MPI_Comm COMM );

Description

HPL_all_reduce performs a global reduce operation across all processes of a group leaving the results on all processes.

Arguments

BUFFER  (local input/global output)   void *
        On entry,  BUFFER  points to  the  buffer to be combined.  On
        exit, this array contains the combined data and  is identical
        on all processes in the group.
COUNT   (global input)                const int
        On entry,  COUNT  indicates the number of entries in  BUFFER.
        COUNT must be at least zero.
DTYPE   (global input)                const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.
OP      (global input)                const HPL_T_OP 
        On entry, OP is a pointer to the local combine function.
COMM    (global/local input)          MPI_Comm
        The MPI communicator identifying the process collection.

See Also

HPL_broadcast, HPL_reduce, HPL_barrier, HPL_min, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_barrier.html0000644000000000000000000000177411256503657013772 00000000000000 HPL_barrier HPL 2.0 Library Functions September 10, 2008

Name

HPL_barrier Barrier operation.

Synopsis

#include "hpl.h"

int HPL_barrier( MPI_Comm COMM );

Description

HPL_barrier blocks the caller until all process members have call it. The call returns at any process only after all group members have entered the call.

Arguments

COMM    (global/local input)          MPI_Comm
        The MPI communicator identifying the process collection.

See Also

HPL_broadcast, HPL_reduce, HPL_all_reduce, HPL_min, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_bcast.html0000644000000000000000000000225511256503657013433 00000000000000 HPL_bcast HPL 2.0 Library Functions September 10, 2008

Name

HPL_bcast Perform the row broadcast.

Synopsis

#include "hpl.h"

int HPL_bcast( HPL_T_panel * PANEL, int * IFLAG );

Description

HPL_bcast broadcasts the current panel. Successful completion is indicated by IFLAG set to HPL_SUCCESS on return. IFLAG will be set to HPL_FAILURE on failure and to HPL_KEEP_TESTING when the operation was not completed, in which case this function should be called again.

Arguments

PANEL   (input/output)                HPL_T_panel *
        On entry,  PANEL  points to the  current panel data structure
        being broadcast.
IFLAG   (output)                      int *
        On exit,  IFLAG  indicates  whether  or not the broadcast has
        occured.

See Also

HPL_binit, HPL_bwait. hpcc-1.4.1/hpl/www/HPL_binit.html0000644000000000000000000000153411256503657013443 00000000000000 HPL_binit HPL 2.0 Library Functions September 10, 2008

Name

HPL_binit Initialize the row broadcast.

Synopsis

#include "hpl.h"

int HPL_binit( HPL_T_panel * PANEL );

Description

HPL_binit initializes a row broadcast. Successful completion is indicated by the returned error code HPL_SUCCESS.

Arguments

PANEL   (input/output)                HPL_T_panel *
        On entry,  PANEL  points to the  current panel data structure
        being broadcast.

See Also

HPL_bcast, HPL_bwait. hpcc-1.4.1/hpl/www/HPL_broadcast.html0000644000000000000000000000342111256503657014275 00000000000000 HPL_broadcast HPL 2.0 Library Functions September 10, 2008

Name

HPL_broadcast Broadcast operation.

Synopsis

#include "hpl.h"

int HPL_broadcast( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const int ROOT, MPI_Comm COMM );

Description

HPL_broadcast broadcasts a message from the process with rank ROOT to all processes in the group.

Arguments

BUFFER  (local input/output)          void *
        On entry,  BUFFER  points to  the  buffer to be broadcast. On
        exit, this array contains the broadcast data and is identical
        on all processes in the group.
COUNT   (global input)                const int
        On entry,  COUNT  indicates the number of entries in  BUFFER.
        COUNT must be at least zero.
DTYPE   (global input)                const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.
ROOT    (global input)                const int
        On entry, ROOT is the coordinate of the source process.
COMM    (global/local input)          MPI_Comm
        The MPI communicator identifying the process collection.

See Also

HPL_reduce, HPL_all_reduce, HPL_barrier, HPL_min, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_bwait.html0000644000000000000000000000161211256503657013441 00000000000000 HPL_bwait HPL 2.0 Library Functions September 10, 2008

Name

HPL_bwait Finalize the row broadcast.

Synopsis

#include "hpl.h"

int HPL_bwait( HPL_T_panel * PANEL );

Description

HPL_bwait HPL_bwait waits for the row broadcast of the current panel to terminate. Successful completion is indicated by the returned error code HPL_SUCCESS.

Arguments

PANEL   (input/output)                HPL_T_panel *
        On entry,  PANEL  points to the  current panel data structure
        being broadcast.

See Also

HPL_binit, HPL_bcast. hpcc-1.4.1/hpl/www/HPL_copyL.html0000644000000000000000000000210711256503657013421 00000000000000 HPL_copyL HPL 2.0 Library Functions September 10, 2008

Name

HPL_copyL Copy the current panel into a contiguous workspace.

Synopsis

#include "hpl.h"

void HPL_copyL( HPL_T_panel * PANEL );

Description

HPL_copyL copies the panel of columns, the L1 replicated submatrix, the pivot array and the info scalar into a contiguous workspace for later broadcast. The copy of this panel into a contiguous buffer can be enforced by specifying -DHPL_COPY_L in the architecture specific Makefile.

Arguments

PANEL   (input/output)                HPL_T_panel *
        On entry,  PANEL  points to the  current panel data structure
        being broadcast.

See Also

HPL_binit, HPL_bcast, HPL_bwait. hpcc-1.4.1/hpl/www/HPL_daxpy.html0000644000000000000000000000461411256503657013465 00000000000000 HPL_daxpy HPL 2.0 Library Functions September 10, 2008

Name

HPL_daxpy y := y + alpha * x.

Synopsis

#include "hpl.h"

void HPL_daxpy( const int N, const double ALPHA, const double * X, const int INCX, double * Y, const int INCY );

Description

HPL_daxpy scales the vector x by alpha and adds it to y.

Arguments

N       (local input)                 const int
        On entry, N specifies the length of the vectors  x  and  y. N
        must be at least zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied as zero, then the entries of the incremented array X
        need not be set on input.
X       (local input)                 const double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.
Y       (local input/output)          double *
        On entry,  Y  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
        On exit, the entries of the incremented array  Y  are updated
        with the scaled entries of the incremented array X.
INCY    (local input)                 const int
        On entry, INCY specifies the increment for the elements of Y.
        INCY must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double x[3], y[3];
   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
   HPL_daxpy( 3, 2.0, x, 1, y, 1 );
   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
   exit(0); return(0);
}

See Also

HPL_dcopy, HPL_dscal, HPL_dswap. hpcc-1.4.1/hpl/www/HPL_dcopy.html0000644000000000000000000000412011256503657013446 00000000000000 HPL_dcopy HPL 2.0 Library Functions September 10, 2008

Name

HPL_dcopy y := x.

Synopsis

#include "hpl.h"

void HPL_dcopy( const int N, const double * X, const int INCX, double * Y, const int INCY );

Description

HPL_dcopy copies the vector x into the vector y.

Arguments

N       (local input)                 const int
        On entry, N specifies the length of the vectors  x  and  y. N
        must be at least zero.
X       (local input)                 const double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.
Y       (local input/output)          double *
        On entry,  Y  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
        On exit, the entries of the incremented array  Y  are updated
        with the entries of the incremented array X.
INCY    (local input)                 const int
        On entry, INCY specifies the increment for the elements of Y.
        INCY must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double x[3], y[3];
   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
   HPL_dcopy( 3, x, 1, y, 1 );
   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
   exit(0); return(0);
}

See Also

HPL_daxpy, HPL_dscal, HPL_dswap. hpcc-1.4.1/hpl/www/HPL_dgemm.html0000644000000000000000000001420111256503657013422 00000000000000 HPL_dgemm HPL 2.0 Library Functions September 10, 2008

Name

HPL_dgemm C := alpha * op(A) * op(B) + beta * C.

Synopsis

#include "hpl.h"

void HPL_dgemm( const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANSA, const enum HPL_TRANS TRANSB, const int M, const int N, const int K, const double ALPHA, const double * A, const int LDA, const double * B, const int LDB, const double BETA, double * C, const int LDC );

Description

HPL_dgemm performs one of the matrix-matrix operations C := alpha * op( A ) * op( B ) + beta * C where op( X ) is one of op( X ) = X or op( X ) = X^T. Alpha and beta are scalars, and A, B and C are matrices, with op(A) an m by k matrix, op(B) a k by n matrix and C an m by n matrix.

Arguments

ORDER   (local input)                 const enum HPL_ORDER
        On entry, ORDER  specifies the storage format of the operands
        as follows:                                                  
           ORDER = HplRowMajor,                                      
           ORDER = HplColumnMajor.                                   
TRANSA  (local input)                 const enum HPL_TRANS
        On entry, TRANSA  specifies the form of  op(A)  to be used in
        the matrix-matrix operation follows:                         
           TRANSA==HplNoTrans    : op( A ) = A,                     
           TRANSA==HplTrans      : op( A ) = A^T,                   
           TRANSA==HplConjTrans  : op( A ) = A^T.                   
TRANSB  (local input)                 const enum HPL_TRANS
        On entry, TRANSB  specifies the form of  op(B)  to be used in
        the matrix-matrix operation follows:                         
           TRANSB==HplNoTrans    : op( B ) = B,                     
           TRANSB==HplTrans      : op( B ) = B^T,                   
           TRANSB==HplConjTrans  : op( B ) = B^T.                   
M       (local input)                 const int
        On entry,  M  specifies  the  number  of rows  of the  matrix
        op(A)  and  of  the  matrix  C.  M  must  be  at least  zero.
N       (local input)                 const int
        On entry,  N  specifies  the number  of columns of the matrix
        op(B)  and  the number of columns of the matrix  C. N must be
        at least zero.
K       (local input)                 const int
        On entry,  K  specifies  the  number of columns of the matrix
        op(A) and the number of rows of the matrix op(B).  K  must be
        be at least  zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied  as  zero  then the elements of the matrices A and B
        need not be set on input.
A       (local input)                 const double *
        On entry,  A  is an array of dimension (LDA,ka),  where ka is
        k  when   TRANSA==HplNoTrans,  and  is  m  otherwise.  Before
        entry  with  TRANSA==HplNoTrans, the  leading  m by k part of
        the array  A must contain the matrix A, otherwise the leading
        k  by  m  part of the array  A  must  contain the  matrix  A.
LDA     (local input)                 const int
        On entry, LDA  specifies the first dimension of A as declared
        in the  calling (sub) program. When  TRANSA==HplNoTrans  then
        LDA must be at least max(1,m), otherwise LDA must be at least
        max(1,k).
B       (local input)                 const double *
        On entry, B is an array of dimension (LDB,kb),  where  kb  is
        n   when  TRANSB==HplNoTrans, and  is  k  otherwise.   Before
        entry with TRANSB==HplNoTrans,  the  leading  k by n  part of
        the array  B must contain the matrix B, otherwise the leading
        n  by  k  part of the array  B  must  contain  the matrix  B.
LDB     (local input)                 const int
        On entry, LDB  specifies the first dimension of B as declared
        in the  calling (sub) program. When  TRANSB==HplNoTrans  then
        LDB must be at least max(1,k), otherwise LDB must be at least
        max(1,n).
BETA    (local input)                 const double
        On entry,  BETA  specifies the scalar  beta.   When  BETA  is
        supplied  as  zero  then  the  elements of the matrix C  need
        not be set on input.
C       (local input/output)          double *
        On entry,  C  is an array of dimension (LDC,n). Before entry,
        the  leading m by n part  of  the  array  C  must contain the
        matrix C,  except when beta is zero, in which case C need not
        be set on entry. On exit, the array  C  is overwritten by the
        m by n  matrix ( alpha*op( A )*op( B ) + beta*C ).
LDC     (local input)                 const int
        On entry, LDC  specifies the first dimension of C as declared
        in  the   calling  (sub)  program.   LDC  must  be  at  least
        max(1,m).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], b[2*2], c[2*2];
   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
   c[0] = 4.0; c[1] = 3.0; c[2] = 2.0; c[3] = 1.0;
   HPL_dgemm( HplColumnMajor, HplNoTrans, HplNoTrans,
              2, 2, 2, 2.0, a, 2, b, 2, -1.0, c, 2 );
   printf("  [%f,%f]\n", c[0], c[2]);
   printf("c=[%f,%f]\n", c[1], c[3]);
   exit(0); return(0);
}

See Also

HPL_dtrsm. hpcc-1.4.1/hpl/www/HPL_dgemv.html0000644000000000000000000001053011256503657013434 00000000000000 HPL_dgemv HPL 2.0 Library Functions September 10, 2008

Name

HPL_dgemv y := beta * y + alpha * op(A) * x.

Synopsis

#include "hpl.h"

void HPL_dgemv( const enum HPL_ORDER ORDER, const enum HPL_TRANS TRANS, const int M, const int N, const double ALPHA, const double * A, const int LDA, const double * X, const int INCX, const double BETA, double * Y, const int INCY );

Description

HPL_dgemv performs one of the matrix-vector operations y := alpha * op( A ) * x + beta * y, where op( X ) is one of op( X ) = X or op( X ) = X^T. where alpha and beta are scalars, x and y are vectors and A is an m by n matrix.

Arguments

ORDER   (local input)                 const enum HPL_ORDER
        On entry, ORDER  specifies the storage format of the operands
        as follows:                                                  
           ORDER = HplRowMajor,                                      
           ORDER = HplColumnMajor.                                   
TRANS   (local input)                 const enum HPL_TRANS
        On entry,  TRANS  specifies the  operation to be performed as
        follows:   
           TRANS = HplNoTrans y := alpha*A  *x + beta*y,
           TRANS = HplTrans   y := alpha*A^T*x + beta*y.
M       (local input)                 const int
        On entry,  M  specifies  the number of rows of  the matrix A.
        M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the number of columns of the matrix A.
        N must be at least zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied as zero then  A and X  need not be set on input.
A       (local input)                 const double *
        On entry,  A  points  to an array of size equal to or greater
        than LDA * n.  Before  entry, the leading m by n part  of the
        array  A  must contain the matrix coefficients.
LDA     (local input)                 const int
        On entry,  LDA  specifies  the  leading  dimension  of  A  as
        declared  in  the  calling  (sub) program.  LDA  must  be  at
        least MAX(1,m).
X       (local input)                 const double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.
BETA    (local input)                 const double
        On entry, BETA  specifies the scalar beta.    When  ALPHA  is
        supplied as zero then  Y  need not be set on input.
Y       (local input/output)          double *
        On entry,  Y  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
        Before entry with BETA non-zero, the incremented array Y must
        contain the vector  y.  On exit,  Y  is  overwritten  by  the
        updated vector y.
INCY    (local input)                 const int
        On entry, INCY specifies the increment for the elements of Y.
        INCY must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], x[2], y[2];
   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
   HPL_dgemv( HplColumnMajor, HplNoTrans, 2, 2, 2.0,
              a, 2, x, 1, -1.0, y, 1 );
   printf("y=[%f,%f]\n", y[0], y[1]);
   exit(0); return(0);
}

See Also

HPL_dger, HPL_dtrsv. hpcc-1.4.1/hpl/www/HPL_dger.html0000644000000000000000000000707711256503657013267 00000000000000 HPL_dger HPL 2.0 Library Functions September 10, 2008

Name

HPL_dger A := alpha * x * y^T + A.

Synopsis

#include "hpl.h"

void HPL_dger( const enum HPL_ORDER ORDER, const int M, const int N, const double ALPHA, const double * X, const int INCX, double * Y, const int INCY, double * A, const int LDA );

Description

HPL_dger performs the rank 1 operation A := alpha * x * y^T + A, where alpha is a scalar, x is an m-element vector, y is an n-element vector and A is an m by n matrix.

Arguments

ORDER   (local input)                 const enum HPL_ORDER
        On entry, ORDER  specifies the storage format of the operands
        as follows:                                                  
           ORDER = HplRowMajor,                                      
           ORDER = HplColumnMajor.                                   
M       (local input)                 const int
        On entry,  M  specifies  the number of rows of  the matrix A.
        M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the number of columns of the matrix A.
        N must be at least zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied as zero then  X and Y  need not be set on input.
X       (local input)                 const double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( m - 1 ) * abs( INCX ) )  that  contains the vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.
Y       (local input)                 double *
        On entry,  Y  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
INCY    (local input)                 const int
        On entry, INCY specifies the increment for the elements of Y.
        INCY must not be zero.
A       (local input/output)          double *
        On entry,  A  points  to an array of size equal to or greater
        than LDA * n.  Before  entry, the leading m by n part  of the
        array  A  must contain the matrix coefficients. On exit, A is
        overwritten by the updated matrix.
LDA     (local input)                 const int
        On entry,  LDA  specifies  the  leading  dimension  of  A  as
        declared  in  the  calling  (sub) program.  LDA  must  be  at
        least MAX(1,m).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], x[2], y[2];
   a[0] = 1.0; a[1] = 2.0; a[2] = 3.0; a[3] = 3.0;
   x[0] = 2.0; x[1] = 1.0; y[2] = 1.0; y[3] = 2.0;
   HPL_dger( HplColumnMajor, 2, 2, 2.0, x, 1, y, 1,
             a, 2 );
   printf("y=[%f,%f]\n", y[0], y[1]);
   exit(0); return(0);
}

See Also

HPL_dgemv, HPL_dtrsv. hpcc-1.4.1/hpl/www/HPL_dlacpy.html0000644000000000000000000000400711256503657013610 00000000000000 HPL_dlacpy HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlacpy B := A.

Synopsis

#include "hpl.h"

void HPL_dlacpy( const int M, const int N, const double * A, const int LDA, double * B, const int LDB );

Description

HPL_dlacpy copies an array A into an array B.

Arguments

M       (local input)                 const int
        On entry,  M specifies the number of rows of the arrays A and
        B. M must be at least zero.
N       (local input)                 const int
        On entry,  N specifies  the number of columns of the arrays A
        and B. N must be at least zero.
A       (local input)                 const double *
        On entry, A points to an array of dimension (LDA,N).
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
B       (local output)                double *
        On entry, B points to an array of dimension (LDB,N). On exit,
        B is overwritten with A.
LDB     (local input)                 const int
        On entry, LDB specifies the leading dimension of the array B.
        LDB must be at least MAX(1,M).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], b[2*2];
   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
   HPL_dlacpy( 2, 2, a, 2, b, 2 );
   printf("  [%f,%f]\n", b[0], b[2]);
   printf("b=[%f,%f]\n", b[1], b[3]);
   exit(0);
   return(0);
}

See Also

HPL_dlatcpy. hpcc-1.4.1/hpl/www/HPL_dlamch.html0000644000000000000000000000662311256503657013572 00000000000000 HPL_dlamch HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlamch determines machine-specific arithmetic constants.

Synopsis

#include "hpl.h"

double HPL_dlamch( const HPL_T_MACH CMACH );

Description

HPL_dlamch determines machine-specific arithmetic constants such as the relative machine precision (eps), the safe minimum (sfmin) such that 1 / sfmin does not overflow, the base of the machine (base), the precision (prec), the number of (base) digits in the mantissa (t), whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the minimum exponent before (gradual) underflow (emin), the underflow threshold (rmin) base**(emin-1), the largest exponent before overflow (emax), the overflow threshold (rmax) (base**emax)*(1-eps).

Arguments

CMACH   (local input)                 const HPL_T_MACH
        Specifies the value to be returned by HPL_dlamch             
           = HPL_MACH_EPS,   HPL_dlamch := eps (default)             
           = HPL_MACH_SFMIN, HPL_dlamch := sfmin                     
           = HPL_MACH_BASE,  HPL_dlamch := base                      
           = HPL_MACH_PREC,  HPL_dlamch := eps*base                  
           = HPL_MACH_MLEN,  HPL_dlamch := t                         
           = HPL_MACH_RND,   HPL_dlamch := rnd                       
           = HPL_MACH_EMIN,  HPL_dlamch := emin                      
           = HPL_MACH_RMIN,  HPL_dlamch := rmin                      
           = HPL_MACH_EMAX,  HPL_dlamch := emax                      
           = HPL_MACH_RMAX,  HPL_dlamch := rmax                      
         
        where                                                        
         
           eps   = relative machine precision,                       
           sfmin = safe minimum,                                     
           base  = base of the machine,                              
           prec  = eps*base,                                         
           t     = number of digits in the mantissa,                 
           rnd   = 1.0 if rounding occurs in addition,               
           emin  = minimum exponent before underflow,                
           rmin  = underflow threshold,                              
           emax  = largest exponent before overflow,                 
           rmax  = overflow threshold.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double eps;
   eps = HPL_dlamch( HPL_MACH_EPS );
   printf("eps=%18.8e\n", eps);
   exit(0); return(0);
}

References

This function has been manually translated from the Fortran 77 LAPACK auxiliary function dlamch.f (version 2.0 -- 1992), that was itself based on the function ENVRON by Malcolm and incorporated suggestions by Gentleman and Marovich. See Malcolm M. A., Algorithms to reveal properties of floating-point arithmetic., Comms. of the ACM, 15, 949-951 (1972). Gentleman W. M. and Marovich S. B., More on algorithms that reveal properties of floating point arithmetic units., Comms. of the ACM, 17, 276-277 (1974). hpcc-1.4.1/hpl/www/HPL_dlange.html0000644000000000000000000000453611256503657013575 00000000000000 HPL_dlange HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlange Compute ||A||.

Synopsis

#include "hpl.h"

double HPL_dlange( const HPL_T_NORM NORM, const int M, const int N, const double * A, const int LDA );

Description

HPL_dlange returns the value of the one norm, or the infinity norm, or the element of largest absolute value of a matrix A: max(abs(A(i,j))) when NORM = HPL_NORM_A, norm1(A), when NORM = HPL_NORM_1, normI(A), when NORM = HPL_NORM_I, where norm1 denotes the one norm of a matrix (maximum column sum) and normI denotes the infinity norm of a matrix (maximum row sum). Note that max(abs(A(i,j))) is not a matrix norm.

Arguments

NORM    (local input)                 const HPL_T_NORM
        On entry,  NORM  specifies  the  value to be returned by this
        function as described above.
M       (local input)                 const int
        On entry,  M  specifies  the number  of rows of the matrix A.
        M must be at least zero.
N       (local input)                 const int
        On entry,  N specifies the number of columns of the matrix A.
        N must be at least zero.
A       (local input)                 const double *
        On entry,  A  points to an  array of dimension  (LDA,N), that
        contains the matrix A.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,M).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2];
   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
   norm = HPL_dlange( HPL_NORM_I, 2, 2, a, 2 );
   printf("norm=%f\n", norm);
   exit(0); return(0);
}

See Also

HPL_dlaprnt, HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_dlaprnt.html0000644000000000000000000000403611256503657014002 00000000000000 HPL_dlaprnt HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaprnt Print the matrix A.

Synopsis

#include "hpl.h"

void HPL_dlaprnt( const int M, const int N, double * A, const int IA, const int JA, const int LDA, const char * CMATNM );

Description

HPL_dlaprnt prints to standard error an M-by-N matrix A.

Arguments

M       (local input)                 const int
        On entry,  M  specifies the number of rows of A. M must be at
        least zero.
N       (local input)                 const int
        On entry,  N  specifies the number of columns of A. N must be
        at least zero.
A       (local input)                 double *
        On entry, A  points to an array of dimension (LDA,N).
IA      (local input)                 const int
        On entry, IA specifies the starting row index to be printed.
JA      (local input)                 const int
        On entry,  JA  specifies  the  starting  column index  to be
        printed.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,M).
CMATNM  (local input)                 const char *
        On entry, CMATNM is the name of the matrix to be printed.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2];
   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
   HPL_dlaprnt( 2, 2, a, 0, 0, 2, "A" );
   exit(0); return(0);
}

See Also

HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_dlaswp00N.html0000644000000000000000000000457611256503657014117 00000000000000 HPL_dlaswp00N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp00N performs a series of row interchanges.

Synopsis

#include "hpl.h"

void HPL_dlaswp00N( const int M, const int N, double * A, const int LDA, const int * IPIV );

Description

HPL_dlaswp00N performs a series of local row interchanges on a matrix A. One row interchange is initiated for rows 0 through M-1 of A.

Arguments

M       (local input)                 const int
        On entry, M specifies the number of rows of the array A to be
        interchanged. M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies  the number of columns of the array A.
        N must be at least zero.
A       (local input/output)          double *
        On entry, A  points to an array of dimension (LDA,N) to which
        the row interchanges will be  applied.  On exit, the permuted
        matrix.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
IPIV    (local input)                 const int *
        On entry,  IPIV  is  an  array of size  M  that  contains the
        pivoting  information.  For  k  in [0..M),  IPIV[k]=IROFF + l
        implies that local rows k and l are to be interchanged.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp01N.html0000644000000000000000000000725711256503657014117 00000000000000 HPL_dlaswp01N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp01N copies rows of A into itself and into U.

Synopsis

#include "hpl.h"

void HPL_dlaswp01N( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp01N copies scattered rows of A into itself and into an array U. The row offsets in A of the source rows are specified by LINDXA. The destination of those rows are specified by LINDXAU. A positive value of LINDXAU indicates that the array destination is U, and A otherwise.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of A that should be
        moved within A or copied into U. M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the length of rows of A that should be
        moved within A or copied into U. N must be at least zero.
A       (local input/output)          double *
        On entry, A points to an array of dimension (LDA,N). The rows
        of this array specified by LINDXA should be moved within A or
        copied into U.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          double *
        On entry, U points to an array of dimension (LDU,N). The rows
        of A specified by LINDXA are be copied within this array U at
        the positions indicated by positive values of LINDXAU.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,M).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local  row indexes  of  A  that should be moved within  A  or
        or copied into U.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension  M that  contains
        the local  row indexes of  U  where the rows of  A  should be
        copied at. This array also contains the  local row offsets in
        A where some of the rows of A should be moved to.  A positive
        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
        should be copied into U at the position LINDXAU[i]; otherwise
        the row  LINDXA[i]  of  A  should be moved  at  the  position
        -LINDXAU[i] within A.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp01T.html0000644000000000000000000000741111256503657014115 00000000000000 HPL_dlaswp01T HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp01T copies rows of A into itself and into U.

Synopsis

#include "hpl.h"

void HPL_dlaswp01T( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp01T copies scattered rows of A into itself and into an array U. The row offsets in A of the source rows are specified by LINDXA. The destination of those rows are specified by LINDXAU. A positive value of LINDXAU indicates that the array destination is U, and A otherwise. Rows of A are stored as columns in U.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of A that should be
        moved within A or copied into U. M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the length of rows of A that should be
        moved within A or copied into U. N must be at least zero.
A       (local input/output)          double *
        On entry, A points to an array of dimension (LDA,N). The rows
        of this array specified by LINDXA should be moved within A or
        copied into U.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          double *
        On entry, U points to an array of dimension (LDU,M). The rows
        of A specified by  LINDXA  are copied within this array  U at
        the  positions indicated by positive values of LINDXAU.  The
        rows of A are stored as columns in U.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,N).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local  row indexes  of  A  that should be moved within  A  or
        or copied into U.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension  M that  contains
        the local  row indexes of  U  where the rows of  A  should be
        copied at. This array also contains the  local row offsets in
        A where some of the rows of A should be moved to.  A positive
        value of  LINDXAU[i]  indicates that the row  LINDXA[i]  of A
        should be copied into U at the position LINDXAU[i]; otherwise
        the row  LINDXA[i]  of  A  should be moved  at  the  position
        -LINDXAU[i] within A.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp02N.html0000644000000000000000000000643211256503657014112 00000000000000 HPL_dlaswp02N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp02N pack rows of A into columns of W.

Synopsis

#include "hpl.h"

void HPL_dlaswp02N( const int M, const int N, const double * A, const int LDA, double * W0, double * W, const int LDW, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp02N packs scattered rows of an array A into workspace W. The row offsets in A are specified by LINDXA.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of A that should be
        copied into W. M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the length of rows of A that should be
        copied into W. N must be at least zero.
A       (local input)                 const double *
        On entry, A points to an array of dimension (LDA,N). The rows
        of this array specified by LINDXA should be copied into W.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
W0      (local input/output)          double *
        On exit,  W0  is  an array of size (M-1)*LDW+1, that contains
        the destination offset  in U where the columns of W should be
        copied.
W       (local output)                double *
        On entry, W  is an array of size (LDW,M). On exit, W contains
        the  rows LINDXA[i] for i in [0..M) of A stored  contiguously
        in W(:,i).
LDW     (local input)                 const int
        On entry, LDW specifies the leading dimension of the array W.
        LDW must be at least MAX(1,N+1).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local row indexes of A that should be copied into W.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension M  that  contains
        the local  row indexes of  U that should be copied into A and
        replaced by the rows of W.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp03N.html0000644000000000000000000000566011256503657014115 00000000000000 HPL_dlaswp03N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp03N copy rows of W into U.

Synopsis

#include "hpl.h"

void HPL_dlaswp03N( const int M, const int N, double * U, const int LDU, const double * W0, const double * W, const int LDW );

Description

HPL_dlaswp03N copies columns of W into rows of an array U. The destination in U of these columns contained in W is stored within W0.

Arguments

M       (local input)                 const int
        On entry, M  specifies  the  number  of columns of  W  stored
        contiguously that should be copied into U. M must be at least
        zero.
N       (local input)                 const int
        On entry,  N  specifies  the  length of columns of  W  stored
        contiguously that should be copied into U. N must be at least
        zero.
U       (local input/output)          double *
        On entry, U points to an array of dimension (LDU,N).  Columns
        of W are copied as rows within this array U at  the positions
        specified in W0.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,M).
W0      (local input)                 const double *
        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
        the destination offset  in U where the columns of W should be
        copied.
W       (local input)                 const double *
        On entry, W  is an array of size (LDW,M),  that contains data
        to be copied into U. For i in [0..M),  entries W(:,i)  should
        be copied into the row or column W0(i*LDW) of U.
LDW     (local input)                 const int
        On entry, LDW specifies the leading dimension of the array W.
        LDW must be at least MAX(1,N+1).

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp03T.html0000644000000000000000000000563511256503657014125 00000000000000 HPL_dlaswp03T HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp03T copy columns of W into U.

Synopsis

#include "hpl.h"

void HPL_dlaswp03T( const int M, const int N, double * U, const int LDU, const double * W0, const double * W, const int LDW );

Description

HPL_dlaswp03T copies columns of W into an array U. The destination in U of these columns contained in W is stored within W0.

Arguments

M       (local input)                 const int
        On entry, M  specifies  the  number  of columns of  W  stored
        contiguously that should be copied into U. M must be at least
        zero.
N       (local input)                 const int
        On entry,  N  specifies  the  length of columns of  W  stored
        contiguously that should be copied into U. N must be at least
        zero.
U       (local input/output)          double *
        On entry, U points to an array of dimension (LDU,M).  Columns
        of W are copied within the array U at the positions specified
        in W0.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,N).
W0      (local input)                 const double *
        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
        the destination offset  in U where the columns of W should be
        copied.
W       (local input)                 const double *
        On entry, W  is an array of size (LDW,M),  that contains data
        to be copied into U. For i in [0..M),  entries W(:,i)  should
        be copied into the row or column W0(i*LDW) of U.
LDW     (local input)                 const int
        On entry, LDW specifies the leading dimension of the array W.
        LDW must be at least MAX(1,N+1).

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp04N.html0000644000000000000000000001020111256503657014101 00000000000000 HPL_dlaswp04N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp04N copy rows of U in A and replace them with columns of W.

Synopsis

#include "hpl.h"

void HPL_dlaswp04N( const int M0, const int M1, const int N, double * U, const int LDU, double * A, const int LDA, const double * W0, const double * W, const int LDW, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp04N copies M0 rows of U into A and replaces those rows of U with columns of W. In addition M1 - M0 columns of W are copied into rows of U.

Arguments

M0      (local input)                 const int
        On entry, M0 specifies the number of rows of U that should be
        copied into  A  and replaced by columns of  W.  M0 must be at
        least zero.
M1      (local input)                 const int
        On entry, M1 specifies the number of columns of W that should
        be copied into rows of U. M1 must be at least zero.
N       (local input)                 const int
        On entry, N specifies the length of the rows of U that should
        be copied into A. N must be at least zero.
U       (local input/output)          double *
        On entry,  U  points to  an array of dimension (LDU,N).  This
        array contains the rows that are to be copied into A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,M1).
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        rows of U indicated by LINDXAU.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M0).
W0      (local input)                 const double *
        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
        the destination offset  in U where the columns of W should be
        copied.
W       (local input)                 const double *
        On entry, W  is an array of size (LDW,M0+M1),  that  contains
        data to be copied into U.  For i in [M0..M0+M1),  the entries
        W(:,i) are copied into the row W0(i*LDW) of U.
LDW     (local input)                 const int
        On entry, LDW specifies the leading dimension of the array W.
        LDW must be at least MAX(1,N+1).
LINDXA  (local input)                 const int *
        On entry, LINDXA  is an array of dimension  M0 containing the
        local row indexes A into which rows of U are copied.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension M0 that  contains
        the local  row indexes of  U that should be copied into A and
        replaced by the columns of W.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp04T.html0000644000000000000000000001026511256503657014121 00000000000000 HPL_dlaswp04T HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp04T copy columns of U in rows of A and replace them with columns of W.

Synopsis

#include "hpl.h"

void HPL_dlaswp04T( const int M0, const int M1, const int N, double * U, const int LDU, double * A, const int LDA, const double * W0, const double * W, const int LDW, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp04T copies M0 columns of U into rows of A and replaces those columns of U with columns of W. In addition M1 - M0 columns of W are copied into U.

Arguments

M0      (local input)                 const int
        On entry, M0 specifies the number of columns of U that should
        be copied into A and replaced by columns of W.  M0 must be at
        least zero.
M1      (local input)                 const int
        On entry, M1 specifies  the number of columnns of W that will
        be copied into U. M1 must be at least zero.
N       (local input)                 const int
        On entry,  N  specifies the length of the columns of  U  that
        will be copied into rows of A. N must be at least zero.
U       (local input/output)          double *
        On entry,  U  points  to an array of dimension (LDU,*).  This
        array contains the columns that are to be copied into rows of
        A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,N).
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        columns of U indicated by LINDXAU.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M0).
W0      (local input)                 const double *
        On entry,  W0  is an array of size (M-1)*LDW+1, that contains
        the destination offset  in U where the columns of W should be
        copied.
W       (local input)                 const double *
        On entry, W  is an array of size (LDW,M0+M1),  that  contains
        data to be copied into U.  For i in [M0..M0+M1),  the entries
        W(:,i) are copied into the column W0(i*LDW) of U.
LDW     (local input)                 const int
        On entry, LDW specifies the leading dimension of the array W.
        LDW must be at least MAX(1,N+1).
LINDXA  (local input)                 const int *
        On entry, LINDXA  is an array of dimension  M0 containing the
        local row indexes A into which columns of U are copied.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension M0 that  contains
        the  local column indexes of  U  that should be copied into A
        and replaced by the columns of W.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp05N.html0000644000000000000000000000575511256503657014124 00000000000000 HPL_dlaswp05N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp05N copy rows of U into A.

Synopsis

#include "hpl.h"

void HPL_dlaswp05N( const int M, const int N, double * A, const int LDA, const double * U, const int LDU, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp05N copies rows of U of global offset LINDXAU into rows of A at positions indicated by LINDXA.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of U that should be
        copied into A. M must be at least zero.
N       (local input)                 const int
        On entry, N specifies the length of the rows of U that should
        be copied into A. N must be at least zero.
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        rows of U indicated by LINDXAU.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          const double *
        On entry,  U  points to an array of dimension  (LDU,N).  This
        array contains the rows that are to be copied into A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,M).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local row indexes of A that should be copied from U.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension  M that  contains
        the local row indexes of U that should be copied in A.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp05T.html0000644000000000000000000000601511256503657014120 00000000000000 HPL_dlaswp05T HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp05T copy rows of U into A.

Synopsis

#include "hpl.h"

void HPL_dlaswp05T( const int M, const int N, double * A, const int LDA, const double * U, const int LDU, const int * LINDXA, const int * LINDXAU );

Description

HPL_dlaswp05T copies columns of U of global offset LINDXAU into rows of A at positions indicated by LINDXA.

Arguments

M       (local input)                 const int
        On entry,  M  specifies the number of columns of U that shouldbe copied into A. M must be at least zero.
N       (local input)                 const int
        On entry, N specifies the length of the columns of U that will
        be copied into rows of A. N must be at least zero.
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        columns of U indicated by LINDXAU.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          const double *
        On entry,  U  points  to an array of dimension (LDU,*).  This
        array contains the columns that are to be copied into rows of
        A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,N).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local row indexes of A that should be copied from U.
LINDXAU (local input)                 const int *
        On entry, LINDXAU  is an array of dimension  M that  contains
        the local column indexes of U that should be copied in A.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp06N.html0000644000000000000000000000537411256503657014122 00000000000000 HPL_dlaswp06N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp06N swap rows of U with rows of A.

Synopsis

#include "hpl.h"

void HPL_dlaswp06N( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA );

Description

HPL_dlaswp06N swaps rows of U with rows of A at positions indicated by LINDXA.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of A that should be
        swapped with rows of U. M must be at least zero.
N       (local input)                 const int
        On entry, N specifies the length of the rows of A that should
        be swapped with rows of U. N must be at least zero.
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        rows or columns of U.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          double *
        On entry,  U  points  to an array of dimension (LDU,N).  This
        array contains the rows of U that are to be swapped with rows
        of A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,M).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local row indexes of A that should be swapped with U.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp06T.html0000644000000000000000000000542411256503657014124 00000000000000 HPL_dlaswp06T HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp06T swap rows or columns of U with rows of A.

Synopsis

#include "hpl.h"

void HPL_dlaswp06T( const int M, const int N, double * A, const int LDA, double * U, const int LDU, const int * LINDXA );

Description

HPL_dlaswp06T swaps columns of U with rows of A at positions indicated by LINDXA.

Arguments

M       (local input)                 const int
        On entry, M  specifies the number of rows of A that should be
        swapped with columns of U. M must be at least zero.
N       (local input)                 const int
        On entry, N specifies the length of the rows of A that should
        be swapped with columns of U. N must be at least zero.
A       (local output)                double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        the  rows of this array specified by  LINDXA  are replaced by
        columns of U.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).
U       (local input/output)          double *
        On entry,  U  points  to an array of dimension (LDU,*).  This
        array contains the columns of  U  that are to be swapped with
        rows of A.
LDU     (local input)                 const int
        On entry, LDU specifies the leading dimension of the array U.
        LDU must be at least MAX(1,N).
LINDXA  (local input)                 const int *
        On entry, LINDXA is an array of dimension M that contains the
        local row indexes of A that should be swapped with U.

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlaswp10N.html0000644000000000000000000000435511256503657014113 00000000000000 HPL_dlaswp10N HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlaswp10N performs a series column interchanges.

Synopsis

#include "hpl.h"

void HPL_dlaswp10N( const int M, const int N, double * A, const int LDA, const int * IPIV );

Description

HPL_dlaswp10N performs a sequence of local column interchanges on a matrix A. One column interchange is initiated for columns 0 through N-1 of A.

Arguments

M       (local input)                 const int
        __arg0__
N       (local input)                 const int
        On entry,  M  specifies  the number of rows of the array A. M
        must be at least zero.
A       (local input/output)          double *
        On entry, N specifies the number of columns of the array A. N
        must be at least zero.
LDA     (local input)                 const int
        On entry, A  points to an  array of  dimension (LDA,N).  This
        array contains the columns onto which the interchanges should
        be applied. On exit, A contains the permuted matrix.
IPIV    (local input)                 const int *
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,M).

See Also

HPL_dlaswp00N, HPL_dlaswp10N, HPL_dlaswp01N, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp03T, HPL_dlaswp04N, HPL_dlaswp04T, HPL_dlaswp05N, HPL_dlaswp05T, HPL_dlaswp06N, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_dlatcpy.html0000644000000000000000000000412711256503657013777 00000000000000 HPL_dlatcpy HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlatcpy B := A^T

Synopsis

#include "hpl.h"

void HPL_dlatcpy( const int M, const int N, const double * A, const int LDA, double * B, const int LDB );

Description

HPL_dlatcpy copies the transpose of an array A into an array B.

Arguments

M       (local input)                 const int
        On entry,  M specifies the number of  rows of the array B and
        the number of columns of A. M must be at least zero.
N       (local input)                 const int
        On entry,  N specifies the number of  rows of the array A and
        the number of columns of B. N must be at least zero.
A       (local input)                 const double *
        On entry, A points to an array of dimension (LDA,M).
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least MAX(1,N).
B       (local output)                double *
        On entry, B points to an array of dimension (LDB,N). On exit,
        B is overwritten with the transpose of A.
LDB     (local input)                 const int
        On entry, LDB specifies the leading dimension of the array B.
        LDB must be at least MAX(1,M).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], b[2*2];
   a[0] = 1.0; a[1] = 3.0; a[2] = 2.0; a[3] = 4.0;
   HPL_dlacpy( 2, 2, a, 2, b, 2 );
   printf("  [%f,%f]\n", b[0], b[2]);
   printf("b=[%f,%f]\n", b[1], b[3]);
   exit(0); return(0);
}

See Also

HPL_dlacpy. hpcc-1.4.1/hpl/www/HPL_dlocmax.html0000644000000000000000000000563111256503657013767 00000000000000 HPL_dlocmax HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlocmax finds the maximum entry in matrix column.

Synopsis

#include "hpl.h"

void HPL_dlocmax( HPL_T_panel * PANEL, const int N, const int II, const int JJ, double * WORK );

Description

HPL_dlocmax finds the maximum entry in the current column and packs the useful information in WORK[0:3]. On exit, WORK[0] contains the local maximum absolute value scalar, WORK[1] is the corresponding local row index, WORK[2] is the corresponding global row index, and WORK[3] is the coordinate of the process owning this max. When N is less than 1, the WORK[0:2] is initialized to zero, and WORK[3] is set to the total number of process rows.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
N       (local input)                 const int
        On entry,  N specifies the local number of rows of the column
        of A on which we operate.
II      (local input)                 const int
        On entry, II  specifies the row offset where the column to be
        operated on starts with respect to the panel.
JJ      (local input)                 const int
        On entry, JJ  specifies the column offset where the column to
        be operated on starts with respect to the panel.
WORK    (local workspace)             double *
        On entry, WORK  is  a workarray of size at least 4.  On exit,
        WORK[0] contains  the  local  maximum  absolute value scalar,
        WORK[1] contains  the corresponding local row index,  WORK[2]
        contains the corresponding global row index, and  WORK[3]  is
        the coordinate of process owning this max.

See Also

HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_dlocswpN.html0000644000000000000000000000520111256503657014122 00000000000000 HPL_dlocswpN HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlocswpN locally swaps rows within panel.

Synopsis

#include "hpl.h"

void HPL_dlocswpN( HPL_T_panel * PANEL, const int II, const int JJ, double * WORK );

Description

HPL_dlocswpN performs the local swapping operations within a panel. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself).

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
II      (local input)                 const int
        On entry, II  specifies the row offset where the column to be
        operated on starts with respect to the panel.
JJ      (local input)                 const int
        On entry, JJ  specifies the column offset where the column to
        be operated on starts with respect to the panel.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
        WORK[0] contains  the  local  maximum  absolute value scalar,
        WORK[1] contains  the corresponding local row index,  WORK[2]
        contains the corresponding global row index, and  WORK[3]  is
        the coordinate of process owning this max.  The N0 length max
        row is stored in WORK[4:4+N0-1];  Note  that this is also the
        JJth row  (or column) of L1. The remaining part of this array
        is used as workspace.

See Also

HPL_dlocmax, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_dlocswpT.html0000644000000000000000000000512511256503657014135 00000000000000 HPL_dlocswpT HPL 2.0 Library Functions September 10, 2008

Name

HPL_dlocswpT locally swaps rows within panel.

Synopsis

#include "hpl.h"

void HPL_dlocswpT( HPL_T_panel * PANEL, const int II, const int JJ, double * WORK );

Description

HPL_dlocswpT performs the local swapping operations within a panel. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
II      (local input)                 const int
        On entry, II  specifies the row offset where the column to be
        operated on starts with respect to the panel.
JJ      (local input)                 const int
        On entry, JJ  specifies the column offset where the column to
        be operated on starts with respect to the panel.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
        WORK[0] contains  the  local  maximum  absolute value scalar,
        WORK[1] contains  the corresponding local row index,  WORK[2]
        contains the corresponding global row index, and  WORK[3]  is
        the coordinate of process owning this max.  The N0 length max
        row is stored in WORK[4:4+N0-1];  Note  that this is also the
        JJth row  (or column) of L1. The remaining part of this array
        is used as workspace.

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_dmatgen.html0000644000000000000000000000375611256503657013765 00000000000000 HPL_dmatgen HPL 2.0 Library Functions September 10, 2008

Name

HPL_dmatgen random matrix generator.

Synopsis

#include "hpl.h"

void HPL_dmatgen( const int M, const int N, double * A, const int LDA, const int ISEED );

Description

HPL_dmatgen generates (or regenerates) a random matrix A. The pseudo-random generator uses the linear congruential algorithm: X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer Programming, Knuth 1973, Vol. 2.

Arguments

M       (input)                       const int
        On entry,  M  specifies  the number  of rows of the matrix A.
        M must be at least zero.
N       (input)                       const int
        On entry,  N specifies the number of columns of the matrix A.
        N must be at least zero.
A       (output)                      double *
        On entry, A points to an array of dimension (LDA,N). On exit,
        this  array  contains   the   coefficients  of  the  randomly
        generated matrix.
LDA     (input)                       const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,M).
ISEED   (input)                       const int
        On entry, ISEED  specifies  the  seed  number to generate the
        matrix A. ISEED must be at least zero.

See Also

HPL_ladd, HPL_lmul, HPL_setran, HPL_xjumpm, HPL_jumpit, HPL_rand. hpcc-1.4.1/hpl/www/HPL_dscal.html0000644000000000000000000000352611256503657013427 00000000000000 HPL_dscal HPL 2.0 Library Functions September 10, 2008

Name

HPL_dscal x = alpha * x.

Synopsis

#include "hpl.h"

void HPL_dscal( const int N, const double ALPHA, double * X, const int INCX );

Description

HPL_dscal scales the vector x by alpha.

Arguments

N       (local input)                 const int
        On entry, N specifies the length of the vector x. N  must  be
        at least zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied as zero, then the entries of the incremented array X
        need not be set on input.
X       (local input/output)          double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
        On exit, the entries of the incremented array  X  are  scaled
        by the scalar alpha.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double x[3];
   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
   HPL_dscal( 3, 2.0, x, 1 );
   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
   exit(0); return(0);
}

See Also

HPL_daxpy, HPL_dcopy, HPL_dswap. hpcc-1.4.1/hpl/www/HPL_dswap.html0000644000000000000000000000434311256503657013455 00000000000000 HPL_dswap HPL 2.0 Library Functions September 10, 2008

Name

HPL_dswap y <-> x.

Synopsis

#include "hpl.h"

void HPL_dswap( const int N, double * X, const int INCX, double * Y, const int INCY );

Description

HPL_dswap swaps the vectors x and y.

Arguments

N       (local input)                 const int
        On entry, N specifies the length of the vectors  x  and  y. N
        must be at least zero.
X       (local input/output)          double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
        On exit, the entries of the incremented array  X  are updated
        with the entries of the incremented array Y.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.
Y       (local input/output)          double *
        On entry,  Y  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCY ) )  that  contains the vector y.
        On exit, the entries of the incremented array  Y  are updated
        with the entries of the incremented array X.
INCY    (local input)                 const int
        On entry, INCY specifies the increment for the elements of Y.
        INCY must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double x[3], y[3];
   x[0] = 1.0; x[1] = 2.0; x[2] = 3.0;
   y[0] = 4.0; y[1] = 5.0; y[2] = 6.0;
   HPL_dswap( 3, x, 1, y, 1 );
   printf("x=[%f,%f,%f]\n", x[0], x[1], x[2]);
   printf("y=[%f,%f,%f]\n", y[0], y[1], y[2]);
   exit(0); return(0);
}

See Also

HPL_daxpy, HPL_dcopy, HPL_dscal. hpcc-1.4.1/hpl/www/HPL_dtrsm.html0000644000000000000000000001345611256503657013475 00000000000000 HPL_dtrsm HPL 2.0 Library Functions September 10, 2008

Name

HPL_dtrsm B := A^{-1} * B or B := B * A^{-1}.

Synopsis

#include "hpl.h"

void HPL_dtrsm( const enum HPL_ORDER ORDER, const enum HPL_SIDE SIDE, const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int M, const int N, const double ALPHA, const double * A, const int LDA, double * B, const int LDB );

Description

HPL_dtrsm solves one of the matrix equations op( A ) * X = alpha * B, or X * op( A ) = alpha * B, where alpha is a scalar, X and B are m by n matrices, A is a unit, or non-unit, upper or lower triangular matrix and op(A) is one of op( A ) = A or op( A ) = A^T. The matrix X is overwritten on B. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

Arguments

ORDER   (local input)                 const enum HPL_ORDER
        On entry, ORDER  specifies the storage format of the operands
        as follows:                                                  
           ORDER = HplRowMajor,                                      
           ORDER = HplColumnMajor.                                   
SIDE    (local input)                 const enum HPL_SIDE
        On entry, SIDE  specifies  whether  op(A) appears on the left
        or right of X as follows:
           SIDE==HplLeft    op( A ) * X = alpha * B,
           SIDE==HplRight   X * op( A ) = alpha * B.
UPLO    (local input)                 const enum HPL_UPLO
        On  entry,   UPLO   specifies  whether  the  upper  or  lower
        triangular  part  of the array  A  is to be referenced.  When
        UPLO==HplUpper, only  the upper triangular part of A is to be
        referenced, otherwise only the lower triangular part of A is 
        to be referenced. 
TRANS   (local input)                 const enum HPL_TRANS
        On entry, TRANSA  specifies the form of  op(A)  to be used in
        the matrix-matrix operation follows:                         
           TRANSA==HplNoTrans    : op( A ) = A,                     
           TRANSA==HplTrans      : op( A ) = A^T,                   
           TRANSA==HplConjTrans  : op( A ) = A^T.                   
DIAG    (local input)                 const enum HPL_DIAG
        On entry,  DIAG  specifies  whether  A  is unit triangular or
        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
        and otherwise, A is not assumed to be unit triangular.
M       (local input)                 const int
        On entry,  M  specifies  the number of rows of the  matrix B.
        M must be at least zero.
N       (local input)                 const int
        On entry, N  specifies the number of columns of the matrix B.
        N must be at least zero.
ALPHA   (local input)                 const double
        On entry, ALPHA specifies the scalar alpha.   When  ALPHA  is
        supplied  as  zero then the elements of the matrix B need not
        be set on input.
A       (local input)                 const double *
        On entry,  A  points  to an array of size equal to or greater
        than LDA * k,  where  k is m  when  SIDE==HplLeft  and  is  n
        otherwise.  Before  entry  with  UPLO==HplUpper,  the leading
        k by k upper triangular  part of the array A must contain the
        upper triangular  matrix and the  strictly  lower  triangular
        part of A is not referenced.  When  UPLO==HplLower on  entry,
        the  leading k by k lower triangular part of the array A must
        contain the lower triangular matrix  and  the  strictly upper
        triangular part of A is not referenced.
         
        Note that  when  DIAG==HplUnit,  the  diagonal elements of  A
        not referenced  either,  but are assumed to be unity.
LDA     (local input)                 const int
        On entry,  LDA  specifies  the  leading  dimension  of  A  as
        declared  in  the  calling  (sub) program.  LDA  must  be  at
        least MAX(1,m) when SIDE==HplLeft, and MAX(1,n) otherwise.
B       (local input/output)          double *
        On entry,  B  points  to an array of size equal to or greater
        than LDB * n.  Before entry, the leading  m by n  part of the
        array B must contain the matrix  B, except when beta is zero,
        in which case B need not be set on entry.  On exit, the array
        B is overwritten by the m by n solution matrix.
LDB     (local input)                 const int
        On entry,  LDB  specifies  the  leading  dimension  of  B  as
        declared  in  the  calling  (sub) program.  LDB  must  be  at
        least MAX(1,m).

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], b[2*2];
   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
   b[0] = 2.0; b[1] = 1.0; b[2] = 1.0; b[3] = 2.0;
   HPL_dtrsm( HplColumnMajor, HplLeft, HplUpper,
              HplNoTrans, HplNonUnit, 2, 2, 2.0,
              a, 2, b, 2 );
   printf("  [%f,%f]\n", b[0], b[2]);
   printf("b=[%f,%f]\n", b[1], b[3]);
   exit(0); return(0);
}

See Also

HPL_dgemm. hpcc-1.4.1/hpl/www/HPL_dtrsv.html0000644000000000000000000001070111256503657013474 00000000000000 HPL_dtrsv HPL 2.0 Library Functions September 10, 2008

Name

HPL_dtrsv x := A^{-1} x.

Synopsis

#include "hpl.h"

void HPL_dtrsv( const enum HPL_ORDER ORDER, const enum HPL_UPLO UPLO, const enum HPL_TRANS TRANS, const enum HPL_DIAG DIAG, const int N, const double * A, const int LDA, double * X, const int INCX );

Description

HPL_dtrsv solves one of the systems of equations A * x = b, or A^T * x = b, where b and x are n-element vectors and A is an n by n non-unit, or unit, upper or lower triangular matrix. No test for singularity or near-singularity is included in this routine. Such tests must be performed before calling this routine.

Arguments

ORDER   (local input)                 const enum HPL_ORDER
        On entry, ORDER  specifies the storage format of the operands
        as follows:                                                  
           ORDER = HplRowMajor,                                      
           ORDER = HplColumnMajor.                                   
UPLO    (local input)                 const enum HPL_UPLO
        On  entry,   UPLO   specifies  whether  the  upper  or  lower
        triangular  part  of the array  A  is to be referenced.  When
        UPLO==HplUpper, only  the upper triangular part of A is to be
        referenced, otherwise only the lower triangular part of A is 
        to be referenced. 
TRANS   (local input)                 const enum HPL_TRANS
        On entry,  TRANS  specifies  the equations  to  be  solved as
        follows:
           TRANS==HplNoTrans     A   * x = b,
           TRANS==HplTrans       A^T * x = b.
DIAG    (local input)                 const enum HPL_DIAG
        On entry,  DIAG  specifies  whether  A  is unit triangular or
        not. When DIAG==HplUnit,  A is assumed to be unit triangular,
        and otherwise, A is not assumed to be unit triangular.
N       (local input)                 const int
        On entry, N specifies the order of the matrix A. N must be at
        least zero.
A       (local input)                 const double *
        On entry,  A  points  to an array of size equal to or greater
        than LDA * n. Before entry with  UPLO==HplUpper,  the leading
        n by n upper triangular  part of the array A must contain the
        upper triangular  matrix and the  strictly  lower  triangular
        part of A is not referenced.  When  UPLO==HplLower  on entry,
        the  leading n by n lower triangular part of the array A must
        contain the lower triangular matrix  and  the  strictly upper
        triangular part of A is not referenced.
         
        Note  that  when  DIAG==HplUnit,  the diagonal elements of  A
        not referenced  either,  but are assumed to be unity.
LDA     (local input)                 const int
        On entry,  LDA  specifies  the  leading  dimension  of  A  as
        declared  in  the  calling  (sub) program.  LDA  must  be  at
        least MAX(1,n).
X       (local input/output)          double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
        Before entry,  the  incremented array  X  must contain  the n
        element right-hand side vector b. On exit,  X  is overwritten
        with the solution vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double a[2*2], x[2];
   a[0] = 4.0; a[1] = 1.0; a[2] = 2.0; a[3] = 5.0;
   x[0] = 2.0; x[1] = 1.0;
   HPL_dtrsv( HplColumnMajor, HplLower, HplNoTrans,
              HplNoUnit, a, 2, x, 1 );
   printf("x=[%f,%f]\n", x[0], x[1]);
   exit(0); return(0);
}

See Also

HPL_dger, HPL_dgemv. hpcc-1.4.1/hpl/www/HPL_equil.html0000644000000000000000000000702411256503657013455 00000000000000 HPL_equil HPL 2.0 Library Functions September 10, 2008

Name

HPL_equil Equilibrate U and forward the column panel L.

Synopsis

#include "hpl.h"

void HPL_equil( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_TRANS TRANS, const int N, double * U, const int LDU, int * IPLEN, const int * IPMAP, const int * IPMAPM1, int * IWORK );

Description

HPL_equil equilibrates the local pieces of U, so that on exit to this function, pieces of U contained in every process row are of the same size. This phase makes the rolling phase optimal. In addition, this function probes for the column panel L and forwards it when possible.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be equilibrated) information.
TRANS   (global input)                const enum HPL_TRANS
        On entry, TRANS specifies whether  U  is stored in transposed
        or non-transposed form.
N       (local input)                 const int
        On entry, N  specifies the number of rows or columns of  U. N
        must be at least 0.
U       (local input/output)          double *
        On entry,  U  is an array of dimension (LDU,*) containing the
        local pieces of U in each process row.
LDU     (local input)                 const int
        On entry, LDU specifies the local leading dimension of U. LDU
        should be at least MAX(1,IPLEN[nprow]) when  U  is stored  in
        non-transposed form, and MAX(1,N) otherwise.
IPLEN   (global input)                int *
        On entry, IPLEN is an array of dimension NPROW+1.  This array
        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
        in process IPMAP[i].
IPMAP   (global input)                const int *
        On entry, IPMAP is an array of dimension  NPROW.  This  array
        contains  the  logarithmic mapping of the processes. In other
        words, IPMAP[myrow]  is the absolute coordinate of the sorted
        process.
IPMAPM1 (global input)                const int *
        On entry, IPMAPM1  is an array of dimension NPROW. This array
        contains  the inverse of the logarithmic mapping contained in
        IPMAP: For i in [0.. NPROCS) IPMAPM1[IPMAP[i]] = i.
IWORK   (workspace)                   int *
        On entry, IWORK is a workarray of dimension NPROW+1.

See Also

HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_fprintf.html0000644000000000000000000000245111256503657014005 00000000000000 HPL_fprintf HPL 2.0 Library Functions September 10, 2008

Name

HPL_fprintf fprintf + fflush wrapper.

Synopsis

#include "hpl.h"

void HPL_fprintf( FILE * STREAM, const char * FORM, ... );

Description

HPL_fprintf is a wrapper around fprintf flushing the output stream.

Arguments

STREAM  (local input)                 FILE *
        On entry, STREAM specifies the output stream.
FORM    (local input)                 const char *
        On entry, FORM specifies the format, i.e., how the subsequent
        arguments are converted for output.
        (local input)                 ...
        On entry,  ...  is the list of arguments to be printed within
        the format string.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   HPL_fprintf( stdout, "Hello World.\n" );
   exit(0); return(0);
}

See Also

HPL_abort, HPL_warn. hpcc-1.4.1/hpl/www/HPL_grid_exit.html0000644000000000000000000000174611256503657014321 00000000000000 HPL_grid_exit HPL 2.0 Library Functions September 10, 2008

Name

HPL_grid_exit Exit process grid.

Synopsis

#include "hpl.h"

int HPL_grid_exit( HPL_T_grid * GRID );

Description

HPL_grid_exit marks the process grid object for deallocation. The returned error code MPI_SUCCESS indicates successful completion. Other error codes are (MPI) implementation dependent.

Arguments

GRID    (local input/output)          HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid to be released.

See Also

HPL_pnum, HPL_grid_init, HPL_grid_info. hpcc-1.4.1/hpl/www/HPL_grid_info.html0000644000000000000000000000364711256503657014305 00000000000000 HPL_grid_info HPL 2.0 Library Functions September 10, 2008

Name

HPL_grid_info Retrieve grid information.

Synopsis

#include "hpl.h"

int HPL_grid_info( const HPL_T_grid * GRID, int * NPROW, int * NPCOL, int * MYROW, int * MYCOL );

Description

HPL_grid_info returns the grid shape and the coordinates in the grid of the calling process. Successful completion is indicated by the returned error code MPI_SUCCESS. Other error codes depend on the MPI implementation.

Arguments

GRID    (local input)                 const HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
NPROW   (global output)               int *
        On exit,   NPROW  specifies the number of process rows in the
        grid. NPROW is at least one.
NPCOL   (global output)               int *
        On exit,   NPCOL  specifies  the number of process columns in
        the grid. NPCOL is at least one.
MYROW   (global output)               int *
        On exit,  MYROW  specifies my  row process  coordinate in the
        grid. MYROW is greater than or equal  to zero  and  less than
        NPROW.
MYCOL   (global output)               int *
        On exit,  MYCOL specifies my column process coordinate in the
        grid. MYCOL is greater than or equal  to zero  and  less than
        NPCOL.

See Also

HPL_pnum, HPL_grid_init, HPL_grid_exit. hpcc-1.4.1/hpl/www/HPL_grid_init.html0000644000000000000000000000436511256503657014313 00000000000000 HPL_grid_init HPL 2.0 Library Functions September 10, 2008

Name

HPL_grid_init Create a process grid.

Synopsis

#include "hpl.h"

int HPL_grid_init( MPI_Comm COMM, const HPL_T_ORDER ORDER, const int NPROW, const int NPCOL, HPL_T_grid * GRID );

Description

HPL_grid_init creates a NPROW x NPCOL process grid using column- or row-major ordering from an initial collection of processes identified by an MPI communicator. Successful completion is indicated by the returned error code MPI_SUCCESS. Other error codes depend on the MPI implementation. The coordinates of processes that are not part of the grid are set to values outside of [0..NPROW) x [0..NPCOL).

Arguments

COMM    (global/local input)          MPI_Comm
        On entry,  COMM  is  the  MPI  communicator  identifying  the
        initial  collection  of  processes out of which  the  grid is
        formed.
ORDER   (global input)                const HPL_T_ORDER
        On entry, ORDER specifies how the processes should be ordered
        in the grid as follows:
           ORDER = HPL_ROW_MAJOR    row-major    ordering;
           ORDER = HPL_COLUMN_MAJOR column-major ordering;
NPROW   (global input)                const int
        On entry,  NPROW  specifies the number of process rows in the
        grid to be created. NPROW must be at least one.
NPCOL   (global input)                const int
        On entry,  NPCOL  specifies  the number of process columns in
        the grid to be created. NPCOL must be at least one.
GRID    (local input/output)          HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information to be initialized.

See Also

HPL_pnum, HPL_grid_info, HPL_grid_exit. hpcc-1.4.1/hpl/www/HPL_idamax.html0000644000000000000000000000314111256503657013575 00000000000000 HPL_idamax HPL 2.0 Library Functions September 10, 2008

Name

HPL_idamax 1st k s.t. |x_k| = max_i(|x_i|).

Synopsis

#include "hpl.h"

int HPL_idamax( const int N, const double * X, const int INCX );

Description

HPL_idamax returns the index in an n-vector x of the first element having maximum absolute value.

Arguments

N       (local input)                 const int
        On entry, N specifies the length of the vector x. N  must  be
        at least zero.
X       (local input)                 const double *
        On entry,  X  is an incremented array of dimension  at  least
        ( 1 + ( n - 1 ) * abs( INCX ) )  that  contains the vector x.
INCX    (local input)                 const int
        On entry, INCX specifies the increment for the elements of X.
        INCX must not be zero.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   double x[3];
   int    imax;
   x[0] = 1.0; x[1] = 3.0; x[2] = 2.0;
   imax = HPL_idamax( 3, x, 1 );
   printf("imax=%d\n", imax);
   exit(0);
   return(0);
}

See Also

HPL_daxpy, HPL_dcopy, HPL_dscal, HPL_dswap. hpcc-1.4.1/hpl/www/HPL_indxg2l.html0000644000000000000000000000403411256503657013703 00000000000000 HPL_indxg2l HPL 2.0 Library Functions September 10, 2008

Name

HPL_indxg2l Map a global index into a local one.

Synopsis

#include "hpl.h"

int HPL_indxg2l( const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS );

Description

HPL_indxg2l computes the local index of a matrix entry pointed to by the global index IG. This local returned index is the same in all processes.

Arguments

IG      (input)                       const int
        On entry, IG specifies the global index of the matrix  entry.
        IG must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of the
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix. NB must be larger than one.
SRCPROC (input)                       const int
        On entry, if SRCPROC = -1, the data  is not  distributed  but
        replicated,  in  which  case  this  routine returns IG in all
        processes. Otherwise, the value of SRCPROC is ignored.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process rows
        or columns over which the matrix is distributed.  NPROCS must
        be at least one.

See Also

HPL_indxg2lp, HPL_indxg2p, HPL_indxl2g, HPL_numroc, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_indxg2lp.html0000644000000000000000000000503611256503657014066 00000000000000 HPL_indxg2lp HPL 2.0 Library Functions September 10, 2008

Name

HPL_indxg2lp Map a local index into a global one.

Synopsis

#include "hpl.h"

void HPL_indxg2lp( int * IL, int * PROC, const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS );

Description

HPL_indxg2lp computes the local index of a matrix entry pointed to by the global index IG as well as the process coordinate which posseses this entry. The local returned index is the same in all processes.

Arguments

IL      (output)                      int *
        On exit, IL specifies the local index corresponding to IG. IL
        is at least zero.
PROC    (output)                      int *
        On exit,  PROC  is the  coordinate of the process  owning the
        entry specified by the global index IG. PROC is at least zero
        and less than NPROCS.
IG      (input)                       const int
        On entry, IG specifies the global index of the matrix  entry.
        IG must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of the
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
SRCPROC (input)                       const int
        On entry, if SRCPROC = -1, the data  is not  distributed  but
        replicated,  in  which  case  this  routine returns IG in all
        processes. Otherwise, the value of SRCPROC is ignored.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process rows
        or columns over which the matrix is distributed.  NPROCS must
        be at least one.

See Also

HPL_indxg2l, HPL_indxg2p, HPL_indxl2g, HPL_numroc, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_indxg2p.html0000644000000000000000000000377611256503657013723 00000000000000 HPL_indxg2p HPL 2.0 Library Functions September 10, 2008

Name

HPL_indxg2p Map a global index into a process coordinate.

Synopsis

#include "hpl.h"

int HPL_indxg2p( const int IG, const int INB, const int NB, const int SRCPROC, const int NPROCS );

Description

HPL_indxg2p computes the process coordinate which posseses the entry of a matrix specified by a global index IG.

Arguments

IG      (input)                       const int
        On entry, IG specifies the global index of the matrix  entry.
        IG must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of the
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
SRCPROC (input)                       const int
        On entry,  SRCPROC  specifies  the coordinate of the  process
        that possesses the first row or column of the matrix. SRCPROC
        must be at least zero and strictly less than NPROCS.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process rows
        or columns over which the matrix is distributed.  NPROCS must
        be at least one.

See Also

HPL_indxg2l, HPL_indxg2p, HPL_indxl2g, HPL_numroc, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_indxl2g.html0000644000000000000000000000446311256503657013711 00000000000000 HPL_indxl2g HPL 2.0 Library Functions September 10, 2008

Name

HPL_indxl2g Map a index-process pair into a global index.

Synopsis

#include "hpl.h"

int HPL_indxl2g( const int IL, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS );

Description

HPL_indxl2g computes the global index of a matrix entry pointed to by the local index IL of the process indicated by PROC.

Arguments

IL      (input)                       const int
        On entry, IL specifies the local  index of the matrix  entry.
        IL must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of the
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
PROC    (input)                       const int
        On entry, PROC  specifies the coordinate of the process whose
        local array row or column is to be determined. PROC  must  be
        at least zero and strictly less than NPROCS.
SRCPROC (input)                       const int
        On entry,  SRCPROC  specifies  the coordinate of the  process
        that possesses the first row or column of the matrix. SRCPROC
        must be at least zero and strictly less than NPROCS.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process rows
        or columns over which the matrix is distributed.  NPROCS must
        be at least one.

See Also

HPL_indxg2l, HPL_indxg2lp, HPL_indxg2p, HPL_numroc, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_infog2l.html0000644000000000000000000001123511256503657013675 00000000000000 HPL_infog2l HPL 2.0 Library Functions September 10, 2008

Name

HPL_infog2l global to local index translation.

Synopsis

#include "hpl.h"

void HPL_infog2l( int I, int J, const int IMB, const int MB, const int INB, const int NB, const int RSRC, const int CSRC, const int MYROW, const int MYCOL, const int NPROW, const int NPCOL, int * II, int * JJ, int * PROW, int * PCOL );

Description

HPL_infog2l computes the starting local index II, JJ corresponding to the submatrix starting globally at the entry pointed by I, J. This routine returns the coordinates in the grid of the process owning the matrix entry of global indexes I, J, namely PROW and PCOL.

Arguments

I       (global input)                int
        On entry,  I  specifies  the  global  row index of the matrix
        entry. I must be at least zero.
J       (global input)                int
        On entry,  J  specifies the global column index of the matrix
        entry. J must be at least zero.
IMB     (global input)                const int
        On entry,  IMB  specifies  the size of the first row block of
        the global matrix. IMB must be at least one.
MB      (global input)                const int
        On entry,  MB specifies the blocking factor used to partition
        and  distribute the rows of the matrix A.  MB  must be larger
        than one.
INB     (global input)                const int
        On entry, INB specifies the size of the first column block of
        the global matrix. INB must be at least one.
NB      (global input)                const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the columns of the matrix A. NB must be larger
        than one.
RSRC    (global input)                const int
        On entry,  RSRC  specifies  the row coordinate of the process
        that possesses the row  I.  RSRC  must  be at least zero  and
        strictly less than NPROW.
CSRC    (global input)                const int
        On entry, CSRC specifies the column coordinate of the process
        that possesses the column J. CSRC  must be at least zero  and
        strictly less than NPCOL.
MYROW   (local input)                 const int
        On entry, MYROW  specifies my  row process  coordinate in the
        grid. MYROW is greater than or equal  to zero  and  less than
        NPROW.
MYCOL   (local input)                 const int
        On entry, MYCOL specifies my column process coordinate in the
        grid. MYCOL is greater than or equal  to zero  and  less than
        NPCOL.
NPROW   (global input)                const int
        On entry,  NPROW  specifies the number of process rows in the
        grid. NPROW is at least one.
NPCOL   (global input)                const int
        On entry,  NPCOL  specifies  the number of process columns in
        the grid. NPCOL is at least one.
II      (local output)                int *
        On exit, II  specifies the  local  starting  row index of the
        submatrix. On exit, II is at least 0.
JJ      (local output)                int *
        On exit, JJ  specifies the local starting column index of the
        submatrix. On exit, JJ is at least 0.
PROW    (global output)               int *
        On exit, PROW is the row coordinate of the process owning the
        entry specified by the global index I.  PROW is at least zero
        and less than NPROW.
PCOL    (global output)               int *
        On exit, PCOL  is the column coordinate of the process owning
        the entry specified by the global index J.  PCOL  is at least
        zero and less than NPCOL.

See Also

HPL_indxg2l, HPL_indxg2p, HPL_indxl2g, HPL_numroc, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_jumpit.html0000644000000000000000000000355211256503657013650 00000000000000 HPL_jumpit HPL 2.0 Library Functions September 10, 2008

Name

HPL_jumpit jump into the random sequence.

Synopsis

#include "hpl.h"

void HPL_jumpit( int * MULT, int * IADD, int * IRANN, int * IRANM );

Description

HPL_jumpit jumps in the random sequence from the number X(n) encoded in IRANN to the number X(m) encoded in IRANM using the constants A and C encoded in MULT and IADD: X(m) = A * X(n) + C. The constants A and C obviously depend on m and n, see the function HPL_xjumpm in order to initialize them.

Arguments

MULT    (local input)                 int *
        On entry, MULT is an array of dimension 2, that contains the
        16-lower and 15-higher bits of the constant A.
IADD    (local input)                 int *
        On entry, IADD is an array of dimension 2, that contains the
        16-lower and 15-higher bits of the constant C.
IRANN   (local input)                 int *
        On entry,  IRANN  is an array of dimension 2,  that contains 
        the 16-lower and 15-higher bits of the encoding of X(n).
IRANM   (local output)                int *
        On entry,  IRANM  is an array of dimension 2.  On exit, this
        array contains respectively the 16-lower and  15-higher bits
        of the encoding of X(m).

See Also

HPL_ladd, HPL_lmul, HPL_setran, HPL_xjumpm, HPL_rand. hpcc-1.4.1/hpl/www/HPL_ladd.html0000644000000000000000000000301211256503657013233 00000000000000 HPL_ladd HPL 2.0 Library Functions September 10, 2008

Name

HPL_ladd Adds two long positive integers.

Synopsis

#include "hpl.h"

void HPL_ladd( int * J, int * K, int * I );

Description

HPL_ladd adds without carry two long positive integers K and J and puts the result into I. The long integers I, J, K are encoded on 64 bits using an array of 2 integers. The 32-lower bits are stored in the first entry of each array, the 32-higher bits in the second entry.

Arguments

J       (local input)                 int *
        On entry, J is an integer array of dimension 2 containing the
        encoded long integer J.
K       (local input)                 int *
        On entry, K is an integer array of dimension 2 containing the
        encoded long integer K.
I       (local output)                int *
        On entry, I is an integer array of dimension 2. On exit, this
        array contains the encoded long integer result.

See Also

HPL_lmul, HPL_setran, HPL_xjumpm, HPL_jumpit, HPL_rand. hpcc-1.4.1/hpl/www/HPL_lmul.html0000644000000000000000000000313411256503657013305 00000000000000 HPL_lmul HPL 2.0 Library Functions September 10, 2008

Name

HPL_lmul multiplies 2 long positive integers.

Synopsis

#include "hpl.h"

void HPL_lmul( int * K, int * J, int * I );

Description

HPL_lmul multiplies without carry two long positive integers K and J and puts the result into I. The long integers I, J, K are encoded on 64 bits using an array of 2 integers. The 32-lower bits are stored in the first entry of each array, the 32-higher bits in the second entry of each array. For efficiency purposes, the intrisic modulo function is inlined.

Arguments

K       (local input)                 int *
        On entry, K is an integer array of dimension 2 containing the
        encoded long integer K.
J       (local input)                 int *
        On entry, J is an integer array of dimension 2 containing the
        encoded long integer J.
I       (local output)                int *
        On entry, I is an integer array of dimension 2. On exit, this
        array contains the encoded long integer result.

See Also

HPL_ladd, HPL_setran, HPL_xjumpm, HPL_jumpit, HPL_rand. hpcc-1.4.1/hpl/www/HPL_logsort.html0000644000000000000000000000561211256503657014030 00000000000000 HPL_logsort HPL 2.0 Library Functions September 10, 2008

Name

HPL_logsort Sort the processes in logarithmic order.

Synopsis

#include "hpl.h"

void HPL_logsort( const int NPROCS, const int ICURROC, int * IPLEN, int * IPMAP, int * IPMAPM1 );

Description

HPL_logsort computes an array IPMAP and its inverse IPMAPM1 that contain the logarithmic sorted processes id with repect to the local number of rows of U that they own. This is necessary to ensure that the logarithmic spreading of U is optimal in terms of number of steps and communication volume as well. In other words, the larget pieces of U will be sent a minimal number of times.

Arguments

NPROCS  (global input)                const int
        On entry, NPROCS  specifies the number of process rows in the
        process grid. NPROCS is at least one.
ICURROC (global input)                const int
        On entry, ICURROC is the source process row.
IPLEN   (global input/output)         int *
        On entry, IPLEN is an array of dimension NPROCS+1,  such that
        IPLEN[0] is 0, and IPLEN[i] contains the number of rows of U,
        that process i-1 has.  On exit,  IPLEN[i]  is  the number  of
        rows of U  in the processes before process IPMAP[i] after the
        sort,  with  the convention that  IPLEN[NPROCS] is  the total
        number  of rows  of the panel.  In other words,  IPLEN[i+1] -
        IPLEN[i] is  the  number of rows of A that should be moved to
        the process IPMAP[i].  IPLEN  is such that the number of rows
        of  the  source process  row is IPLEN[1] - IPLEN[0],  and the
        remaining  entries  of  this  array  are  sorted  so that the
        quantities IPLEN[i+1]-IPLEN[i] are logarithmically sorted.
IPMAP   (global output)               int *
        On entry,  IPMAP  is an array of dimension  NPROCS.  On exit,
        array contains  the logarithmic mapping of the processes.  In
        other words, IPMAP[myroc] is the corresponding sorted process
        coordinate.
IPMAPM1 (global output)               int *
        On entry, IPMAPM1  is an array of dimension NPROCS.  On exit,
        this  array  contains  the inverse of the logarithmic mapping
        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
        [0.. NPROCS)

See Also

HPL_plindx1, HPL_plindx10, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_max.html0000644000000000000000000000305211256503657013120 00000000000000 HPL_max HPL 2.0 Library Functions September 10, 2008

Name

HPL_max Combine (max) two buffers.

Synopsis

#include "hpl.h"

void HPL_max( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE );

Description

HPL_max combines (max) two buffers.

Arguments

N       (input)                       const int
        On entry, N  specifies  the  length  of  the  buffers  to  be
        combined. N must be at least zero.
IN      (input)                       const void *
        On entry, IN points to the input-only buffer to be combined.
INOUT   (input/output)                void *
        On entry, INOUT  points  to  the  input-output  buffer  to be
        combined.  On exit,  the  entries of this array contains  the
        combined results.
DTYPE   (input)                       const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.

See Also

HPL_broadcast, HPL_reduce, HPL_all_reduce, HPL_barrier, HPL_min, HPL_sum. hpcc-1.4.1/hpl/www/HPL_min.html0000644000000000000000000000305211256503657013116 00000000000000 HPL_min HPL 2.0 Library Functions September 10, 2008

Name

HPL_min Combine (min) two buffers.

Synopsis

#include "hpl.h"

void HPL_min( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE );

Description

HPL_min combines (min) two buffers.

Arguments

N       (input)                       const int
        On entry, N  specifies  the  length  of  the  buffers  to  be
        combined. N must be at least zero.
IN      (input)                       const void *
        On entry, IN points to the input-only buffer to be combined.
INOUT   (input/output)                void *
        On entry, INOUT  points  to  the  input-output  buffer  to be
        combined.  On exit,  the  entries of this array contains  the
        combined results.
DTYPE   (input)                       const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.

See Also

HPL_broadcast, HPL_reduce, HPL_all_reduce, HPL_barrier, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_numroc.html0000644000000000000000000000446511256503657013647 00000000000000 HPL_numroc HPL 2.0 Library Functions September 10, 2008

Name

HPL_numroc Compute the local number of row/columns.

Synopsis

#include "hpl.h"

int HPL_numroc( const int N, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS );

Description

HPL_numroc returns the local number of matrix rows/columns process PROC will get if we give out N rows/columns starting from global index 0.

Arguments

N       (input)                       const int
        On entry, N  specifies the number of rows/columns being dealt
        out. N must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of the
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
PROC    (input)                       const int
        On entry, PROC specifies  the coordinate of the process whose
        local portion is determined.  PROC must be at least zero  and
        strictly less than NPROCS.
SRCPROC (input)                       const int
        On entry,  SRCPROC  specifies  the coordinate of the  process
        that possesses the first row or column of the matrix. SRCPROC
        must be at least zero and strictly less than NPROCS.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process rows
        or columns over which the matrix is distributed.  NPROCS must
        be at least one.

See Also

HPL_indxg2l, HPL_indxg2lp, HPL_indxg2p, HPL_indxl2g, HPL_numrocI. hpcc-1.4.1/hpl/www/HPL_numrocI.html0000644000000000000000000000477211256503657013761 00000000000000 HPL_numrocI HPL 2.0 Library Functions September 10, 2008

Name

HPL_numrocI Compute the local number of row/columns.

Synopsis

#include "hpl.h"

int HPL_numrocI( const int N, const int I, const int INB, const int NB, const int PROC, const int SRCPROC, const int NPROCS );

Description

HPL_numrocI returns the local number of matrix rows/columns process PROC will get if we give out N rows/columns starting from global index I.

Arguments

N       (input)                       const int
        On entry, N  specifies the number of rows/columns being dealt
        out. N must be at least zero.
I       (input)                       const int
        On entry, I  specifies the global index of the matrix  entry
        I must be at least zero.
INB     (input)                       const int
        On entry,  INB  specifies  the size of the first block of th
        global matrix. INB must be at least one.
NB      (input)                       const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
PROC    (input)                       const int
        On entry, PROC specifies  the coordinate of the process whos
        local portion is determined.  PROC must be at least zero  an
        strictly less than NPROCS.
SRCPROC (input)                       const int
        On entry,  SRCPROC  specifies  the coordinate of the  proces
        that possesses the first row or column of the matrix. SRCPRO
        must be at least zero and strictly less than NPROCS.
NPROCS  (input)                       const int
        On entry,  NPROCS  specifies the total number of process row
        or columns over which the matrix is distributed.  NPROCS mus
        be at least one.

See Also

HPL_indxg2l, HPL_indxg2lp, HPL_indxg2p, HPL_indxl2g, HPL_numroc. hpcc-1.4.1/hpl/www/HPL_pabort.html0000644000000000000000000000266411256503657013632 00000000000000 HPL_pabort HPL 2.0 Library Functions September 10, 2008

Name

HPL_pabort halts execution.

Synopsis

#include "hpl.h"

void HPL_pabort( int LINE, const char * SRNAME, const char * FORM, ... );

Description

HPL_pabort displays an error message on stderr and halts execution.

Arguments

LINE    (local input)                 int
        On entry,  LINE  specifies the line  number in the file where
        the  error  has  occured.  When  LINE  is not a positive line
        number, it is ignored.
SRNAME  (local input)                 const char *
        On entry, SRNAME  should  be the name of the routine  calling
        this error handler.
FORM    (local input)                 const char *
        On entry, FORM specifies the format, i.e., how the subsequent
        arguments are converted for output.
        (local input)                 ...
        On entry,  ...  is the list of arguments to be printed within
        the format string.

See Also

HPL_fprintf, HPL_pwarn. hpcc-1.4.1/hpl/www/HPL_packL.html0000644000000000000000000000276611256503657013400 00000000000000 HPL_packL HPL 2.0 Library Functions September 10, 2008

Name

HPL_packL Form the MPI structure for the row ring broadcasts.

Synopsis

#include "hpl.h"

int HPL_packL( HPL_T_panel * PANEL, const int INDEX, const int LEN, const int IBUF );

Description

HPL_packL forms the MPI data type for the panel to be broadcast. Successful completion is indicated by the returned error code MPI_SUCCESS.

Arguments

PANEL   (input/output)                HPL_T_panel *
        On entry,  PANEL  points to the  current panel data structure
        being broadcast.
INDEX   (input)                       const int
        On entry,  INDEX  points  to  the  first entry of the  packed
        buffer being broadcast.
LEN     (input)                       const int
        On entry, LEN is the length of the packed buffer.
IBUF    (input)                       const int
        On entry, IBUF  specifies the panel buffer/count/type entries
        that should be initialized.

See Also

HPL_binit, HPL_bcast, HPL_bwait. hpcc-1.4.1/hpl/www/HPL_pddriver.html0000644000000000000000000000114411256503657014152 00000000000000 main HPL 2.0 Library Functions September 10, 2008

Name

main HPL main timing program.

Synopsis

#include "hpl.h"

int main();

Description

main is the main driver program for testing the HPL routines. This program is driven by a short data file named "HPL.dat".

See Also

HPL_pdinfo, HPL_pdtest. hpcc-1.4.1/hpl/www/HPL_pdfact.html0000644000000000000000000000605111256503657013576 00000000000000 HPL_pdfact HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdfact recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdfact( HPL_T_panel * PANEL );

Description

HPL_pdfact recursively factorizes a 1-dimensional panel of columns. The RPFACT function pointer specifies the recursive algorithm to be used, either Crout, Left- or Right looking. NBMIN allows to vary the recursive stopping criterium in terms of the number of columns in the panel, and NDIV allow to specify the number of subpanels each panel should be divided into. Usuallly a value of 2 will be chosen. Finally PFACT is a function pointer specifying the non-recursive algorithm to to be used on at most NBMIN columns. One can also choose here between Crout, Left- or Right looking. Empirical tests seem to indicate that values of 4 or 8 for NBMIN give the best results. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT. hpcc-1.4.1/hpl/www/HPL_pdgesv.html0000644000000000000000000000303411256503657013623 00000000000000 HPL_pdgesv HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdgesv Solve A x = b.

Synopsis

#include "hpl.h"

void HPL_pdgesv( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A );

Description

HPL_pdgesv factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with or without look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.

See Also

HPL_pdgesv0, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdtrsv. hpcc-1.4.1/hpl/www/HPL_pdgesv0.html0000644000000000000000000000353311256503657013707 00000000000000 HPL_pdgesv0 HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdgesv0 Factor an N x N+1 matrix.

Synopsis

#include "hpl.h"

void HPL_pdgesv0( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A );

Description

HPL_pdgesv0 factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant without look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.

See Also

HPL_pdgesv, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdfact, HPL_binit, HPL_bcast, HPL_bwait, HPL_pdupdateNN, HPL_pdupdateNT, HPL_pdupdateTN, HPL_pdupdateTT. hpcc-1.4.1/hpl/www/HPL_pdgesvK1.html0000644000000000000000000000346111256503657014023 00000000000000 HPL_pdgesvK1 HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdgesvK1 Factor an N x N+1 matrix.

Synopsis

#include "hpl.h"

void HPL_pdgesvK1( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A );

Description

HPL_pdgesvK1 factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.

See Also

HPL_pdgesv, HPL_pdgesvK2, HPL_pdfact, HPL_binit, HPL_bcast, HPL_bwait, HPL_pdupdateNN, HPL_pdupdateNT, HPL_pdupdateTN, HPL_pdupdateTT. hpcc-1.4.1/hpl/www/HPL_pdgesvK2.html0000644000000000000000000000353511256503657014026 00000000000000 HPL_pdgesvK2 HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdgesvK2 Factor an N x N+1 matrix.

Synopsis

#include "hpl.h"

void HPL_pdgesvK2( HPL_T_grid * GRID, HPL_T_palg * ALGO, HPL_T_pmat * A );

Description

HPL_pdgesvK2 factors a N+1-by-N matrix using LU factorization with row partial pivoting. The main algorithm is the "right looking" variant with look-ahead. The lower triangular factor is left unpivoted and the pivots are not returned. The right hand side is the N+1 column of the coefficient matrix.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.

See Also

HPL_pdgesv, HPL_pdgesv0, HPL_pdgesvK1, HPL_pdfact, HPL_binit, HPL_bcast, HPL_bwait, HPL_pdupdateNN, HPL_pdupdateNT, HPL_pdupdateTN, HPL_pdupdateTT. hpcc-1.4.1/hpl/www/HPL_pdinfo.html0000644000000000000000000002101111256503657013605 00000000000000 HPL_pdinfo HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdinfo Read input parameter file.

Synopsis

#include "hpl.h"

void HPL_pdinfo( HPL_T_test * TEST, int * NS, int * N, int * NBS, int * NB, HPL_T_ORDER * PMAPPIN, int * NPQS, int * P, int * Q, int * NPFS, HPL_T_FACT * PF, int * NBMS, int * NBM, int * NDVS, int * NDV, int * NRFS, HPL_T_FACT * RF, int * NTPS, HPL_T_TOP * TP, int * NDHS, int * DH, HPL_T_SWAP * FSWAP, int * TSWAP, int * L1NOTRAN, int * UNOTRAN, int * EQUIL, int * ALIGN );

Description

HPL_pdinfo reads the startup information for the various tests and transmits it to all processes.

Arguments

TEST    (global output)               HPL_T_test *
        On entry, TEST  points to a testing data structure.  On exit,
        the fields of this data structure are initialized as follows:
        TEST->outfp  specifies the output file where the results will
        be printed.  It is only defined and used by  the process 0 of
        the grid.  TEST->thrsh specifies the threshhold value for the
        test ratio.  TEST->epsil is the relative machine precision of
        the distributed computer.  Finally  the test counters, kfail,
        kpass, kskip, ktest are initialized to zero.
NS      (global output)               int *
        On exit,  NS  specifies the number of different problem sizes
        to be tested. NS is less than or equal to HPL_MAX_PARAM.
N       (global output)               int *
        On entry, N is an array of dimension HPL_MAX_PARAM.  On exit,
        the first NS entries of this array contain the  problem sizes
        to run the code with.
NBS     (global output)               int *
        On exit,  NBS  specifies the number of different distribution
        blocking factors to be tested. NBS must be less than or equal
        to HPL_MAX_PARAM.
NB      (global output)               int *
        On exit,  PMAPPIN  specifies the process mapping onto the no-
        des of the  MPI machine configuration.  PMAPPIN  defaults  to
        row-major ordering.
PMAPPIN (global output)               HPL_T_ORDER *
        On entry, NB is an array of dimension HPL_MAX_PARAM. On exit,
        the first NBS entries of this array contain the values of the
        various distribution blocking factors, to run the code with.
NPQS    (global output)               int *
        On exit, NPQS  specifies the  number of different values that
        can be used for P and Q, i.e., the number of process grids to
        run  the  code with.  NPQS must be  less  than  or  equal  to
        HPL_MAX_PARAM.
P       (global output)               int *
        On entry, P  is an array of dimension HPL_MAX_PARAM. On exit,
        the first NPQS entries of this array contain the values of P,
        the number of process rows of the  NPQS grids to run the code
        with.
Q       (global output)               int *
        On entry, Q  is an array of dimension HPL_MAX_PARAM. On exit,
        the first NPQS entries of this array contain the values of Q,
        the number of process columns of the  NPQS  grids to  run the
        code with.
NPFS    (global output)               int *
        On exit, NPFS  specifies the  number of different values that
        can be used for PF : the panel factorization algorithm to run
        the code with. NPFS is less than or equal to HPL_MAX_PARAM.
PF      (global output)               HPL_T_FACT *
        On entry, PF is an array of dimension HPL_MAX_PARAM. On exit,
        the first  NPFS  entries  of this array  contain  the various
        panel factorization algorithms to run the code with.
NBMS    (global output)               int *
        On exit,  NBMS  specifies  the  number  of  various recursive
        stopping criteria  to be tested.  NBMS  must be  less than or
        equal to HPL_MAX_PARAM.
NBM     (global output)               int *
        On entry,  NBM  is an array of  dimension  HPL_MAX_PARAM.  On
        exit, the first NBMS entries of this array contain the values
        of the various recursive stopping criteria to be tested.
NDVS    (global output)               int *
        On exit,  NDVS  specifies  the number  of various numbers  of
        panels in recursion to be tested.  NDVS is less than or equal
        to HPL_MAX_PARAM.
NDV     (global output)               int *
        On entry,  NDV  is an array of  dimension  HPL_MAX_PARAM.  On
        exit, the first NDVS entries of this array contain the values
        of the various numbers of panels in recursion to be tested.
NRFS    (global output)               int *
        On exit, NRFS  specifies the  number of different values that
        can be used for RF : the recursive factorization algorithm to
        be tested. NRFS is less than or equal to HPL_MAX_PARAM.
RF      (global output)               HPL_T_FACT *
        On entry, RF is an array of dimension HPL_MAX_PARAM. On exit,
        the first  NRFS  entries  of  this array contain  the various
        recursive factorization algorithms to run the code with.
NTPS    (global output)               int *
        On exit, NTPS  specifies the  number of different values that
        can be used for the  broadcast topologies  to be tested. NTPS
        is less than or equal to HPL_MAX_PARAM.
TP      (global output)               HPL_T_TOP *
        On entry, TP is an array of dimension HPL_MAX_PARAM. On exit,
        the  first NTPS  entries of this  array  contain  the various
        broadcast (along rows) topologies to run the code with.
NDHS    (global output)               int *
        On exit, NDHS  specifies the  number of different values that
        can be used for the  lookahead depths to be  tested.  NDHS is
        less than or equal to HPL_MAX_PARAM.
DH      (global output)               int *
        On entry,  DH  is  an array of  dimension  HPL_MAX_PARAM.  On
        exit, the first NDHS entries of this array contain the values
        of lookahead depths to run the code with.  Such a value is at
        least 0 (no-lookahead) or greater than zero.
FSWAP   (global output)               HPL_T_SWAP *
        On exit, FSWAP specifies the swapping algorithm to be used in
        all tests.
TSWAP   (global output)               int *
        On exit,  TSWAP  specifies the swapping threshold as a number
        of columns when the mixed swapping algorithm was chosen.
L1NOTRA (global output)               int *
        On exit, L1NOTRAN specifies whether the upper triangle of the
        panels of columns  should  be stored  in  no-transposed  form
        (L1NOTRAN=1) or in transposed form (L1NOTRAN=0).
UNOTRAN (global output)               int *
        On exit, UNOTRAN  specifies whether the panels of rows should
        be stored in  no-transposed form  (UNOTRAN=1)  or  transposed
        form (UNOTRAN=0) during their broadcast.
EQUIL   (global output)               int *
        On exit,  EQUIL  specifies  whether  equilibration during the
        swap-broadcast  of  the  panel of rows  should  be  performed
        (EQUIL=1) or not (EQUIL=0).
ALIGN   (global output)               int *
        On exit,  ALIGN  specifies the alignment  of  the dynamically
        allocated buffers in double precision words. ALIGN is greater
        than zero.

See Also

HPL_pddriver, HPL_pdtest. hpcc-1.4.1/hpl/www/HPL_pdlamch.html0000644000000000000000000000542711256503657013753 00000000000000 HPL_pdlamch HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlamch determines machine-specific arithmetic constants.

Synopsis

#include "hpl.h"

double HPL_pdlamch( MPI_Comm COMM, const HPL_T_MACH CMACH );

Description

HPL_pdlamch determines machine-specific arithmetic constants such as the relative machine precision (eps), the safe minimum(sfmin) such that 1/sfmin does not overflow, the base of the machine (base), the precision (prec), the number of (base) digits in the mantissa (t), whether rounding occurs in addition (rnd = 1.0 and 0.0 otherwise), the minimum exponent before (gradual) underflow (emin), the underflow threshold (rmin)- base**(emin-1), the largest exponent before overflow (emax), the overflow threshold (rmax) - (base**emax)*(1-eps).

Arguments

COMM    (global/local input)          MPI_Comm
        The MPI communicator identifying the process collection.
CMACH   (global input)                const HPL_T_MACH
        Specifies the value to be returned by HPL_pdlamch            
           = HPL_MACH_EPS,   HPL_pdlamch := eps (default)            
           = HPL_MACH_SFMIN, HPL_pdlamch := sfmin                    
           = HPL_MACH_BASE,  HPL_pdlamch := base                     
           = HPL_MACH_PREC,  HPL_pdlamch := eps*base                 
           = HPL_MACH_MLEN,  HPL_pdlamch := t                        
           = HPL_MACH_RND,   HPL_pdlamch := rnd                      
           = HPL_MACH_EMIN,  HPL_pdlamch := emin                     
           = HPL_MACH_RMIN,  HPL_pdlamch := rmin                     
           = HPL_MACH_EMAX,  HPL_pdlamch := emax                     
           = HPL_MACH_RMAX,  HPL_pdlamch := rmax                     
         
        where                                                        
         
           eps   = relative machine precision,                       
           sfmin = safe minimum,                                     
           base  = base of the machine,                              
           prec  = eps*base,                                         
           t     = number of digits in the mantissa,                 
           rnd   = 1.0 if rounding occurs in addition,               
           emin  = minimum exponent before underflow,                
           rmin  = underflow threshold,                              
           emax  = largest exponent before overflow,                 
           rmax  = overflow threshold.
hpcc-1.4.1/hpl/www/HPL_pdlange.html0000644000000000000000000000511011256503657013742 00000000000000 HPL_pdlange HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlange Compute ||A||.

Synopsis

#include "hpl.h"

double HPL_pdlange( const HPL_T_grid * GRID, const HPL_T_NORM NORM, const int M, const int N, const int NB, const double * A, const int LDA );

Description

HPL_pdlange returns the value of the one norm, or the infinity norm, or the element of largest absolute value of a distributed matrix A: max(abs(A(i,j))) when NORM = HPL_NORM_A, norm1(A), when NORM = HPL_NORM_1, normI(A), when NORM = HPL_NORM_I, where norm1 denotes the one norm of a matrix (maximum column sum) and normI denotes the infinity norm of a matrix (maximum row sum). Note that max(abs(A(i,j))) is not a matrix norm.

Arguments

GRID    (local input)                 const HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
NORM    (global input)                const HPL_T_NORM
        On entry,  NORM  specifies  the  value to be returned by this
        function as described above.
M       (global input)                const int
        On entry,  M  specifies  the number  of rows of the matrix A.
        M must be at least zero.
N       (global input)                const int
        On entry,  N specifies the number of columns of the matrix A.
        N must be at least zero.
NB      (global input)                const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix. NB must be larger than one.
A       (local input)                 const double *
        On entry,  A  points to an array of dimension  (LDA,LocQ(N)),
        that contains the local pieces of the distributed matrix A.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,LocP(M)).

See Also

HPL_pdlaprnt, HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_pdlaprnt.html0000644000000000000000000000531311256503657014161 00000000000000 HPL_pdlaprnt HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlaprnt Print a distributed matrix A.

Synopsis

#include "hpl.h"

void HPL_pdlaprnt( const HPL_T_grid * GRID, const int M, const int N, const int NB, double * A, const int LDA, const int IAROW, const int IACOL, const char * CMATNM );

Description

HPL_pdlaprnt prints to standard error a distributed matrix A. The local pieces of A are sent to the process of coordinates (0,0) in the grid and then printed.

Arguments

GRID    (local input)                 const HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
M       (global input)                const int
        On entry,  M  specifies the number of rows of the coefficient
        matrix A. M must be at least zero.
N       (global input)                const int
        On  entry,   N   specifies  the  number  of  columns  of  the
        coefficient matrix A. N must be at least zero.
NB      (global input)                const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix. NB must be larger than one.
A       (local input)                 double *
        On entry,  A  points to an  array of dimension (LDA,LocQ(N)).
        This array contains the coefficient matrix to be printed.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,LocP(M)).
IAROW   (global input)                const int
        On entry,  IAROW  specifies the row process coordinate owning
        the  first row of A.  IAROW  must be  larger than or equal to
        zero and less than NPROW.
IACOL   (global input)                const int
        On entry,  IACOL  specifies  the  column  process  coordinate
        owning the  first column  of A. IACOL  must be larger than or
        equal to zero and less than NPCOL.
CMATNM  (global input)                const char *
        On entry, CMATNM is the name of the matrix to be printed.

See Also

HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_pdlaswp00N.html0000644000000000000000000000534711256503657014274 00000000000000 HPL_pdlaswp00N HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlaswp00N Broadcast a column panel L and swap the row panel U.

Synopsis

#include "hpl.h"

void HPL_pdlaswp00N( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdlaswp00N applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. Bi-directional exchange is used to perform the swap :: broadcast of the row panel U at once, resulting in a lower number of messages than usual as well as a lower communication volume. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: log_2(P) * (lat + NB*LocQ(N) / bdwth) where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. Mono directional links will double this communication cost.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be broadcast and swapped) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to  be swapped and broadcast starting at
        the current position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesvK2, HPL_pdupdateNN, HPL_pdupdateTN, HPL_pipid, HPL_plindx0, HPL_dlaswp01N, HPL_dlaswp02N, HPL_dlaswp03N, HPL_dlaswp04N, HPL_dlaswp05N. hpcc-1.4.1/hpl/www/HPL_pdlaswp00T.html0000644000000000000000000000534711256503657014302 00000000000000 HPL_pdlaswp00T HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlaswp00T Broadcast a column panel L and swap the row panel U.

Synopsis

#include "hpl.h"

void HPL_pdlaswp00T( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdlaswp00T applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. Bi-directional exchange is used to perform the swap :: broadcast of the row panel U at once, resulting in a lower number of messages than usual as well as a lower communication volume. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: log_2(P) * (lat + NB*LocQ(N) / bdwth) where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. Mono directional links will double this communication cost.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be broadcast and swapped) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to  be swapped and broadcast starting at
        the current position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesvK2, HPL_pdupdateNT, HPL_pdupdateTT, HPL_pipid, HPL_plindx0, HPL_dlaswp01T, HPL_dlaswp02N, HPL_dlaswp03T, HPL_dlaswp04T, HPL_dlaswp05T. hpcc-1.4.1/hpl/www/HPL_pdlaswp01N.html0000644000000000000000000000565311256503657014275 00000000000000 HPL_pdlaswp01N HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlaswp01N Broadcast a column panel L and swap the row panel U.

Synopsis

#include "hpl.h"

void HPL_pdlaswp01N( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdlaswp01N applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. A "Spread then roll" algorithm performs the swap :: broadcast of the row panel U at once, resulting in a minimal communication volume and a "very good" use of the connectivity if available. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. K is a constant in (2,3] that depends on the achieved bandwidth during a simultaneous message exchange between two processes. An empirical optimistic value of K is typically 2.4.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to  be swapped and broadcast starting at
        the current position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesvK2, HPL_pdupdateNN, HPL_pdupdateTN, HPL_pipid, HPL_plindx1, HPL_plindx10, HPL_spreadN, HPL_equil, HPL_rollN, HPL_dlaswp00N, HPL_dlaswp01N, HPL_dlaswp06N. hpcc-1.4.1/hpl/www/HPL_pdlaswp01T.html0000644000000000000000000000565311256503657014303 00000000000000 HPL_pdlaswp01T HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdlaswp01T Broadcast a column panel L and swap the row panel U.

Synopsis

#include "hpl.h"

void HPL_pdlaswp01T( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdlaswp01T applies the NB row interchanges to NN columns of the trailing submatrix and broadcast a column panel. A "Spread then roll" algorithm performs the swap :: broadcast of the row panel U at once, resulting in a minimal communication volume and a "very good" use of the connectivity if available. With P process rows and assuming bi-directional links, the running time of this function can be approximated by: (log_2(P)+(P-1)) * lat + K * NB * LocQ(N) / bdwth where NB is the number of rows of the row panel U, N is the global number of columns being updated, lat and bdwth are the latency and bandwidth of the network for double precision real words. K is a constant in (2,3] that depends on the achieved bandwidth during a simultaneous message exchange between two processes. An empirical optimistic value of K is typically 2.4.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to  be swapped and broadcast starting at
        the current position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesvK2, HPL_pdupdateNT, HPL_pdupdateTT, HPL_pipid, HPL_plindx1, HPL_plindx10, HPL_spreadT, HPL_equil, HPL_rollT, HPL_dlaswp10N, HPL_dlaswp01T, HPL_dlaswp06T. hpcc-1.4.1/hpl/www/HPL_pdmatgen.html0000644000000000000000000000473111256503657014137 00000000000000 HPL_pdmatgen HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdmatgen Parallel random matrix generator.

Synopsis

#include "hpl.h"

void HPL_pdmatgen( const HPL_T_grid * GRID, const int M, const int N, const int NB, double * A, const int LDA, const int ISEED );

Description

HPL_pdmatgen generates (or regenerates) a parallel random matrix A. The pseudo-random generator uses the linear congruential algorithm: X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer Programming, Knuth 1973, Vol. 2.

Arguments

GRID    (local input)                 const HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
M       (global input)                const int
        On entry,  M  specifies  the number  of rows of the matrix A.
        M must be at least zero.
N       (global input)                const int
        On entry,  N specifies the number of columns of the matrix A.
        N must be at least zero.
NB      (global input)                const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.
A       (local output)                double *
        On entry,  A  points  to an array of dimension (LDA,LocQ(N)).
        On exit, this array contains the coefficients of the randomly
        generated matrix.
LDA     (local input)                 const int
        On entry, LDA specifies the leading dimension of the array A.
        LDA must be at least max(1,LocP(M)).
ISEED   (global input)                const int
        On entry, ISEED  specifies  the  seed  number to generate the
        matrix A. ISEED must be at least zero.

See Also

HPL_ladd, HPL_lmul, HPL_setran, HPL_xjumpm, HPL_jumpit, HPL_drand. hpcc-1.4.1/hpl/www/HPL_pdmxswp.html0000644000000000000000000000636211256503657014044 00000000000000 HPL_pdmxswp HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdmxswp swaps and broacast the pivot row.

Synopsis

#include "hpl.h"

void HPL_pdmxswp( HPL_T_panel * PANEL, const int M, const int II, const int JJ, double * WORK );

Description

HPL_pdmxswp swaps and broadcasts the absolute value max row using bi-directional exchange. The buffer is partially set by HPL_dlocmax. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by log_2( P ) * ( lat + ( 2 * N0 + 4 ) / bdwth ) where lat and bdwth are the latency and bandwidth of the network for double precision real elements. Communication only occurs in one process column. Mono-directional links will cause the communication cost to double.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of the matrix
        column on which this function operates.
II      (local input)                 const int
        On entry, II  specifies the row offset where the column to be
        operated on starts with respect to the panel.
JJ      (local input)                 const int
        On entry, JJ  specifies the column offset where the column to
        be operated on starts with respect to the panel.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2 * (4+2*N0).
        It  is assumed that  HPL_dlocmax  was called  prior  to  this
        routine to  initialize  the first four entries of this array.
        On exit, the  N0  length max row is stored in WORK[4:4+N0-1];
        Note that this is also the  JJth  row  (or column) of L1. The
        remaining part is used as a temporary array.

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdpancrN.html0000644000000000000000000000705411256503657014106 00000000000000 HPL_pdpancrN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpancrN Crout panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpancrN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpancrN factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT. hpcc-1.4.1/hpl/www/HPL_pdpancrT.html0000644000000000000000000000700011256503657014103 00000000000000 HPL_pdpancrT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpancrT Crout panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpancrT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpancrT factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT. hpcc-1.4.1/hpl/www/HPL_pdpanel_disp.html0000644000000000000000000000174211256503657015001 00000000000000 HPL_pdpanel_disp HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanel_disp Deallocate a panel data structure.

Synopsis

#include "hpl.h"

int HPL_pdpanel_disp( HPL_T_panel * * PANEL );

Description

HPL_pdpanel_disp deallocates the panel structure and resources and stores the error code returned by the panel factorization.

Arguments

PANEL   (local input/output)          HPL_T_panel * *
        On entry,  PANEL  points  to  the  address  of the panel data
        structure to be deallocated.

See Also

HPL_pdpanel_new, HPL_pdpanel_init, HPL_pdpanel_free. hpcc-1.4.1/hpl/www/HPL_pdpanel_free.html0000644000000000000000000000173111256503657014761 00000000000000 HPL_pdpanel_free HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanel_free Deallocate the panel ressources.

Synopsis

#include "hpl.h"

int HPL_pdpanel_free( HPL_T_panel * PANEL );

Description

HPL_pdpanel_free deallocates the panel resources and stores the error code returned by the panel factorization.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points  to  the  panel data  structure from
        which the resources should be deallocated.

See Also

HPL_pdpanel_new, HPL_pdpanel_init, HPL_pdpanel_disp. hpcc-1.4.1/hpl/www/HPL_pdpanel_init.html0000644000000000000000000000533611256503657015010 00000000000000 HPL_pdpanel_init HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanel_init Initialize the panel resources.

Synopsis

#include "hpl.h"

void HPL_pdpanel_init( HPL_T_grid * GRID, HPL_T_palg * ALGO, const int M, const int N, const int JB, HPL_T_pmat * A, const int IA, const int JA, const int TAG, HPL_T_panel * PANEL );

Description

HPL_pdpanel_init initializes a panel data structure.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
M       (local input)                 const int
        On entry, M specifies the global number of rows of the panel.
        M must be at least zero.
N       (local input)                 const int
        On entry,  N  specifies  the  global number of columns of the
        panel and trailing submatrix. N must be at least zero.
JB      (global input)                const int
        On entry, JB specifies is the number of columns of the panel.
        JB must be at least zero.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.
IA      (global input)                const int
        On entry,  IA  is  the global row index identifying the panel
        and trailing submatrix. IA must be at least zero.
JA      (global input)                const int
        On entry, JA is the global column index identifying the panel
        and trailing submatrix. JA must be at least zero.
TAG     (global input)                const int
        On entry, TAG is the row broadcast message id.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.

See Also

HPL_pdpanel_new, HPL_pdpanel_disp, HPL_pdpanel_free. hpcc-1.4.1/hpl/www/HPL_pdpanel_new.html0000644000000000000000000000537211256503657014636 00000000000000 HPL_pdpanel_new HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanel_new Create a panel data structure.

Synopsis

#include "hpl.h"

void HPL_pdpanel_new( HPL_T_grid * GRID, HPL_T_palg * ALGO, const int M, const int N, const int JB, HPL_T_pmat * A, const int IA, const int JA, const int TAG, HPL_T_panel * * PANEL );

Description

HPL_pdpanel_new creates and initializes a panel data structure.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters.
M       (local input)                 const int
        On entry, M specifies the global number of rows of the panel.
        M must be at least zero.
N       (local input)                 const int
        On entry,  N  specifies  the  global number of columns of the
        panel and trailing submatrix. N must be at least zero.
JB      (global input)                const int
        On entry, JB specifies is the number of columns of the panel.
        JB must be at least zero.
A       (local input/output)          HPL_T_pmat *
        On entry, A points to the data structure containing the local
        array information.
IA      (global input)                const int
        On entry,  IA  is  the global row index identifying the panel
        and trailing submatrix. IA must be at least zero.
JA      (global input)                const int
        On entry, JA is the global column index identifying the panel
        and trailing submatrix. JA must be at least zero.
TAG     (global input)                const int
        On entry, TAG is the row broadcast message id.
PANEL   (local input/output)          HPL_T_panel * *
        On entry,  PANEL  points  to  the  address  of the panel data
        structure to create and initialize.

See Also

HPL_pdpanel_new, HPL_pdpanel_init, HPL_pdpanel_disp. hpcc-1.4.1/hpl/www/HPL_pdpanllN.html0000644000000000000000000000707111256503657014110 00000000000000 HPL_pdpanllN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanllN Left-looking panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpanllN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpanllN factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Left-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT. hpcc-1.4.1/hpl/www/HPL_pdpanllT.html0000644000000000000000000000701511256503657014114 00000000000000 HPL_pdpanllT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanllT Left-looking panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpanllT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpanllT factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Left-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanrlN, HPL_pdpanrlT. hpcc-1.4.1/hpl/www/HPL_pdpanrlN.html0000644000000000000000000000707311256503657014120 00000000000000 HPL_pdpanrlN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanrlN Right-looking panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpanrlN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpanrlN factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Right-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlT. hpcc-1.4.1/hpl/www/HPL_pdpanrlT.html0000644000000000000000000000701711256503657014124 00000000000000 HPL_pdpanrlT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdpanrlT Right-looking panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdpanrlT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdpanrlT factorizes a panel of columns that is a sub-array of a larger one-dimensional panel A using the Right-looking variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost. Note that one iteration of the the main loop is unrolled. The local computation of the absolute value max of the next column is performed just after its update by the current column. This allows to bring the current column only once through cache at each step. The current implementation does not perform any blocking for this sequence of BLAS operations, however the design allows for plugging in an optimal (machine-specific) specialized BLAS-like kernel. This idea has been suggested to us by Fred Gustavson, IBM T.J. Watson Research Center.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN. hpcc-1.4.1/hpl/www/HPL_pdrpancrN.html0000644000000000000000000000653411256503657014272 00000000000000 HPL_pdrpancrN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpancrN Crout recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpancrN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpancrN HPL_pdrpancrN recursively factorizes a panel of columns using the recursive Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdrpancrT.html0000644000000000000000000000644211256503657014276 00000000000000 HPL_pdrpancrT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpancrT Crout recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpancrT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpancrT recursively factorizes a panel of columns using the recursive Crout variant of the usual one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdrpanllN.html0000644000000000000000000000652511256503657014275 00000000000000 HPL_pdrpanllN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpanllN Left-looking recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpanllN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpanllN recursively factorizes a panel of columns using the recursive Left-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdrpanllT.html0000644000000000000000000000645111256503657014301 00000000000000 HPL_pdrpanllT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpanllT Left-looking recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpanllT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpanllT recursively factorizes a panel of columns using the recursive Left-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanrlN, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdrpanrlN.html0000644000000000000000000000652611256503657014304 00000000000000 HPL_pdrpanrlN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpanrlN Right-looking recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpanrlN( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpanrlN recursively factorizes a panel of columns using the recursive Right-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in no-transpose form (i.e. just like the input matrix itself). Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlT, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdrpanrlT.html0000644000000000000000000000645211256503657014310 00000000000000 HPL_pdrpanrlT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdrpanrlT Right-looking recursive panel factorization.

Synopsis

#include "hpl.h"

void HPL_pdrpanrlT( HPL_T_panel * PANEL, const int M, const int N, const int ICOFF, double * WORK );

Description

HPL_pdrpanrlT recursively factorizes a panel of columns using the recursive Right-looking variant of the one-dimensional algorithm. The lower triangular N0-by-N0 upper block of the panel is stored in transpose form. Bi-directional exchange is used to perform the swap::broadcast operations at once for one column in the panel. This results in a lower number of slightly larger messages than usual. On P processes and assuming bi-directional links, the running time of this function can be approximated by (when N is equal to N0): N0 * log_2( P ) * ( lat + ( 2*N0 + 4 ) / bdwth ) + N0^2 * ( M - N0/3 ) * gam2-3 where M is the local number of rows of the panel, lat and bdwth are the latency and bandwidth of the network for double precision real words, and gam2-3 is an estimate of the Level 2 and Level 3 BLAS rate of execution. The recursive algorithm allows indeed to almost achieve Level 3 BLAS performance in the panel factorization. On a large number of modern machines, this operation is however latency bound, meaning that its cost can be estimated by only the latency portion N0 * log_2(P) * lat. Mono-directional links will double this communication cost.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
M       (local input)                 const int
        On entry,  M specifies the local number of rows of sub(A).
N       (local input)                 const int
        On entry,  N specifies the local number of columns of sub(A).
ICOFF   (global input)                const int
        On entry, ICOFF specifies the row and column offset of sub(A)
        in A.
WORK    (local workspace)             double *
        On entry, WORK  is a workarray of size at least 2*(4+2*N0).

See Also

HPL_dlocmax, HPL_dlocswpN, HPL_dlocswpT, HPL_pdmxswp, HPL_pdpancrN, HPL_pdpancrT, HPL_pdpanllN, HPL_pdpanllT, HPL_pdpanrlN, HPL_pdpanrlT, HPL_pdrpancrN, HPL_pdrpancrT, HPL_pdrpanllN, HPL_pdrpanllT, HPL_pdrpanrlN, HPL_pdfact. hpcc-1.4.1/hpl/www/HPL_pdtest.html0000644000000000000000000000533111256503657013640 00000000000000 HPL_pdtest HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdtest Perform one test.

Synopsis

#include "hpl.h"

void HPL_pdtest( HPL_T_test * TEST, HPL_T_grid * GRID, HPL_T_palg * ALGO, const int N, const int NB );

Description

HPL_pdtest performs one test given a set of parameters such as the process grid, the problem size, the distribution blocking factor ... This function generates the data, calls and times the linear system solver, checks the accuracy of the obtained vector solution and writes this information to the file pointed to by TEST->outfp.

Arguments

TEST    (global input)                HPL_T_test *
        On entry,  TEST  points  to a testing data structure:  outfp
        specifies the output file where the results will be printed.
        It is only defined and used by the process  0  of the  grid.
        thrsh  specifies  the  threshhold value  for the test ratio.
        Concretely, a test is declared "PASSED"  if and only if  the
        following inequality is satisfied:
        ||Ax-b||_oo / ( epsil *
                        ( || x ||_oo * || A ||_oo + || b ||_oo ) *
                         N )  < thrsh.
        epsil  is the  relative machine precision of the distributed
        computer. Finally the test counters, kfail, kpass, kskip and
        ktest are updated as follows:  if the test passes,  kpass is
        incremented by one;  if the test fails, kfail is incremented
        by one; if the test is skipped, kskip is incremented by one.
        ktest is left unchanged.
GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
ALGO    (global input)                HPL_T_palg *
        On entry,  ALGO  points to  the data structure containing the
        algorithmic parameters to be used for this test.
N       (global input)                const int
        On entry,  N specifies the order of the coefficient matrix A.
        N must be at least zero.
NB      (global input)                const int
        On entry,  NB specifies the blocking factor used to partition
        and distribute the matrix A. NB must be larger than one.

See Also

HPL_pddriver, HPL_pdinfo. hpcc-1.4.1/hpl/www/HPL_pdtrsv.html0000644000000000000000000000415711256503657013664 00000000000000 HPL_pdtrsv HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdtrsv Solve triu( A ) x = b.

Synopsis

#include "hpl.h"

void HPL_pdtrsv( HPL_T_grid * GRID, HPL_T_pmat * AMAT );

Description

HPL_pdtrsv solves an upper triangular system of linear equations. The rhs is the last column of the N by N+1 matrix A. The solve starts in the process column owning the Nth column of A, so the rhs b may need to be moved one process column to the left at the beginning. The routine therefore needs a column vector in every process column but the one owning b. The result is replicated in all process rows, and returned in XR, i.e. XR is of size nq = LOCq( N ) in all processes. The algorithm uses decreasing one-ring broadcast in process rows and columns implemented in terms of synchronous communication point to point primitives. The lookahead of depth 1 is used to minimize the critical path. This entire operation is essentially ``latency'' bound and an estimate of its running time is given by: (move rhs) lat + N / ( P bdwth ) + (solve) ((N / NB)-1) 2 (lat + NB / bdwth) + gam2 N^2 / ( P Q ), where gam2 is an estimate of the Level 2 BLAS rate of execution. There are N / NB diagonal blocks. One must exchange 2 messages of length NB to compute the next NB entries of the vector solution, as well as performing a total of N^2 floating point operations.

Arguments

GRID    (local input)                 HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
AMAT    (local input/output)          HPL_T_pmat *
        On entry,  AMAT  points  to the data structure containing the
        local array information.

See Also

HPL_pdgesv. hpcc-1.4.1/hpl/www/HPL_pdupdateNN.html0000644000000000000000000000363011256503657014377 00000000000000 HPL_pdupdateNN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdupdateNN Broadcast a panel and update the trailing submatrix.

Synopsis

#include "hpl.h"

void HPL_pdupdateNN( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdupdateNN broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local output)                int *
        On exit,  IFLAG  indicates  whether or not  the broadcast has
        been completed when PBCST is not NULL on entry. In that case,
        IFLAG is left unchanged.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be updated) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to be updated  starting  at the  current
        position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesv0, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdlaswp00N, HPL_pdlaswp01N. hpcc-1.4.1/hpl/www/HPL_pdupdateNT.html0000644000000000000000000000363011256503657014405 00000000000000 HPL_pdupdateNT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdupdateNT Broadcast a panel and update the trailing submatrix.

Synopsis

#include "hpl.h"

void HPL_pdupdateNT( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdupdateNT broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local output)                int *
        On exit,  IFLAG  indicates  whether or not  the broadcast has
        been completed when PBCST is not NULL on entry. In that case,
        IFLAG is left unchanged.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be updated) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to be updated  starting  at the  current
        position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesv0, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdlaswp00T, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_pdupdateTN.html0000644000000000000000000000363011256503657014405 00000000000000 HPL_pdupdateTN HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdupdateTN Broadcast a panel and update the trailing submatrix.

Synopsis

#include "hpl.h"

void HPL_pdupdateTN( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdupdateTN broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local output)                int *
        On exit,  IFLAG  indicates  whether or not  the broadcast has
        been completed when PBCST is not NULL on entry. In that case,
        IFLAG is left unchanged.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be updated) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to be updated  starting  at the  current
        position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesv0, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdlaswp00N, HPL_pdlaswp01N. hpcc-1.4.1/hpl/www/HPL_pdupdateTT.html0000644000000000000000000000363011256503657014413 00000000000000 HPL_pdupdateTT HPL 2.0 Library Functions September 10, 2008

Name

HPL_pdupdateTT Broadcast a panel and update the trailing submatrix.

Synopsis

#include "hpl.h"

void HPL_pdupdateTT( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int NN );

Description

HPL_pdupdateTT broadcast - forward the panel PBCST and simultaneously applies the row interchanges and updates part of the trailing (using the panel PANEL) submatrix.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local output)                int *
        On exit,  IFLAG  indicates  whether or not  the broadcast has
        been completed when PBCST is not NULL on entry. In that case,
        IFLAG is left unchanged.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be updated) information.
NN      (local input)                 const int
        On entry, NN specifies  the  local  number  of columns of the
        trailing  submatrix  to be updated  starting  at the  current
        position. NN must be at least zero.

See Also

HPL_pdgesv, HPL_pdgesv0, HPL_pdgesvK1, HPL_pdgesvK2, HPL_pdlaswp00T, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_perm.html0000644000000000000000000000411611256503657013300 00000000000000 HPL_perm HPL 2.0 Library Functions September 10, 2008

Name

HPL_perm Combine 2 index arrays - Generate the permutation.

Synopsis

#include "hpl.h"

void HPL_perm( const int N, int * LINDXA, int * LINDXAU, int * IWORK );

Description

HPL_perm combines two index arrays and generate the corresponding permutation. First, this function computes the inverse of LINDXA, and then combine it with LINDXAU. Second, in order to be able to perform the permutation in place, LINDXAU is overwritten by the sequence of permutation producing the same result. What we ultimately want to achieve is: U[LINDXAU[i]] := U[LINDXA[i]] for i in [0..N). After the call to this function, this in place permutation can be performed by for i in [0..N) swap U[i] with U[LINDXAU[i]].

Arguments

N       (global input)                const int
        On entry,  N  specifies the length of the arrays  LINDXA  and
        LINDXAU. N should be at least zero.
LINDXA  (global input/output)         int *
        On entry,  LINDXA  is an array of dimension N  containing the
        source indexes. On exit,  LINDXA  contains the combined index
        array.
LINDXAU (global input/output)         int *
        On entry,  LINDXAU is an array of dimension N  containing the
        target indexes.  On exit,  LINDXAU  contains  the sequence of
        permutation,  that  should be applied  in increasing order to
        permute the underlying array U in place.
IWORK   (workspace)                   int *
        On entry, IWORK is a workarray of dimension N.

See Also

HPL_plindx1, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_pipid.html0000644000000000000000000000714011256503657013442 00000000000000 HPL_pipid HPL 2.0 Library Functions September 10, 2008

Name

HPL_pipid Simplify the pivot vector.

Synopsis

#include "hpl.h"

void HPL_pipid( HPL_T_panel * PANEL, int * K, int * IPID );

Description

HPL_pipid computes an array IPID that contains the source and final destination of matrix rows resulting from the application of N interchanges as computed by the LU factorization with row partial pivoting. The array IPID is such that the row of global index IPID(i) should be mapped onto the row of global index IPID(i+1). Note that we cannot really know the length of IPID a priori. However, we know that this array is at least 2*N long, since there are N rows to swap and broadcast. The length of this array must be smaller than or equal to 4*N, since every row is swapped with at most a single distinct remote row. The algorithm constructing IPID goes as follows: Let IA be the global index of the first row to be swapped. For every row src IA + i with i in [0..N) to be swapped with row dst such that dst is given by DPIV[i]: Is row src the destination of a previous row of the current block, that is, is there k odd such that IPID(k) is equal to src ? Yes: update this destination with dst. For example, if the pivot array is (0,2)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in fact row 0 and 5, i.e., row 0 goes to 5 and not 2 as it was thought so far ... No : add the pair (src,dst) at the end of IPID; row src has not been moved yet. Is row dst different from src the destination of a previous row of the current block, i.e., is there k odd such that IPID(k) is equal to dst ? Yes: update IPID(k) with src. For example, if the pivot array is (0,5)(1,1)(2,5) ... , then when we swap rows 2 and 5, we swap in fact row 2 and 0, i.e., row 0 goes to 2 and not 5 as it was thought so far ... No : add the pair (dst,src) at the end of IPID; row dst has not been moved yet. Note that when src is equal to dst, the pair (dst,src) should not be added to IPID in order to avoid duplicated entries in this array. During the construction of the array IPID, we make sure that the first N entries are such that IPID(k) with k odd is equal to IA+k/2. For k in [0..K/2), the row of global index IPID(2*k) should be mapped onto the row of global index IPID(2*k+1).

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
K       (global output)               int *
        On exit, K specifies the number of entries in  IPID.  K is at
        least 2*N, and at most 4*N.
IPID    (global output)               int *
        On entry, IPID is an array of length 4*N.  On exit, the first
        K entries of that array contain the src and final destination
        resulting  from  the  application of the  N  interchanges  as
        specified by  DPIV.  The  pairs  (src,dst)  are  contiguously
        stored and sorted so that IPID(2*i+1) is equal to IA+i with i
        in [0..N)

See Also

HPL_pdlaswp00N, HPL_pdlaswp00T, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_plindx0.html0000644000000000000000000001653311256503657013721 00000000000000 HPL_plindx0 HPL 2.0 Library Functions September 10, 2008

Name

HPL_plindx0 Compute local swapping index arrays.

Synopsis

#include "hpl.h"

void HPL_plindx0( HPL_T_panel * PANEL, const int K, int * IPID, int * LINDXA, int * LINDXAU, int * LLEN );

Description

HPL_plindx0 computes two local arrays LINDXA and LINDXAU containing the local source and final destination position resulting from the application of row interchanges. On entry, the array IPID of length K is such that the row of global index IPID(i) should be mapped onto row of global index IPID(i+1). Let IA be the global index of the first row to be swapped. For k in [0..K/2), the row of global index IPID(2*k) should be mapped onto the row of global index IPID(2*k+1). The question then, is to determine which rows should ultimately be part of U. First, some rows of the process ICURROW may be swapped locally. One of this row belongs to U, the other one belongs to my local piece of A. The other rows of the current block are swapped with remote rows and are thus not part of U. These rows however should be sent along, and grabbed by the other processes as we progress in the exchange phase. So, assume that I am ICURROW and consider a row of index IPID(2*i) that I own. If I own IPID(2*i+1) as well and IPID(2*i+1) - IA is less than N, this row is locally swapped and should be copied into U at the position IPID(2*i+1) - IA. No row will be exchanged for this one. If IPID(2*i+1)-IA is greater than N, then the row IPID(2*i) should be locally copied into my local piece of A at the position corresponding to the row of global index IPID(2*i+1). If the process ICURROW does not own IPID(2*i+1), then row IPID(2*i) is to be swapped away and strictly speaking does not belong to U, but to A remotely. Since this process will however send this array U, this row is copied into U, exactly where the row IPID(2*i+1) should go. For this, we search IPID for k1, such that IPID(2*k1) is equal to IPID(2*i+1); and row IPID(2*i) is to be copied in U at the position IPID(2*k1+1)-IA. It is thus important to put the rows that go into U, i.e., such that IPID(2*i+1) - IA is less than N at the begining of the array IPID. By doing so, U is formed, and the local copy is performed in just one sweep. Two lists LINDXA and LINDXAU are built. LINDXA contains the local index of the rows I have that should be copied. LINDXAU contains the local destination information: if LINDXAU(k) >= 0, row LINDXA(k) of A is to be copied in U at position LINDXAU(k). Otherwise, row LINDXA(k) of A should be locally copied into A(-LINDXAU(k),:). In the process ICURROW, the initial packing algorithm proceeds as follows. for all entries in IPID, if IPID(2*i) is in ICURROW, if IPID(2*i+1) is in ICURROW, if( IPID(2*i+1) - IA < N ) save corresponding local position of this row (LINDXA); save local position (LINDXAU) in U where this row goes; [copy row IPID(2*i) in U at position IPID(2*i+1)-IA; ]; else save corresponding local position of this row (LINDXA); save local position (-LINDXAU) in A where this row goes; [copy row IPID(2*i) in my piece of A at IPID(2*i+1);] end if else find k1 such that IPID(2*k1) = IPID(2*i+1); copy row IPID(2*i) in U at position IPID(2*k1+1)-IA; save corresponding local position of this row (LINDXA); save local position (LINDXAU) in U where this row goes; end if end if end for Second, if I am not the current row process ICURROW, all source rows in IPID that I own are part of U. Indeed, they are swapped with one row of the current block of rows, and the main factorization algorithm proceeds one row after each other. The processes different from ICURROW, should exchange and accumulate those rows until they receive some data previously owned by the process ICURROW. In processes different from ICURROW, the initial packing algorithm proceeds as follows. Consider a row of global index IPID(2*i) that I own. When I will be receiving data previously owned by ICURROW, i.e., U, row IPID(2*i) should replace the row in U at pos. IPID(2*i+1)-IA, and this particular row of U should be first copied into my piece of A, at A(il,:), where il is the local row index corresponding to IPID(2*i). Now,initially, this row will be packed into workspace, say as the kth row of that work array. The following algorithm sets LINDXAU[k] to IPID(2*i+1)-IA, that is the position in U where the row should be copied. LINDXA(k) stores the local index in A where this row of U should be copied, i.e il. for all entries in IPID, if IPID(2*i) is not in ICURROW, copy row IPID(2*i) in work array; save corresponding local position of this row (LINDXA); save position (LINDXAU) in U where this row should be copied; end if end for Since we are at it, we also globally figure out how many rows every process has. That is necessary, because it would rather be cumbersome to figure it on the fly during the bi-directional exchange phase. This information is kept in the array LLEN of size NPROW. Also note that the arrays LINDXA and LINDXAU are of max length equal to 2*N.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
K       (global input)                const int
        On entry, K specifies the number of entries in IPID.  K is at
        least 2*N, and at most 4*N.
IPID    (global input)                int *
        On entry,  IPID  is an array of length K. The first K entries
        of that array contain the src and final destination resulting
        from the application of the interchanges.
LINDXA  (local output)                int *
        On entry, LINDXA  is an array of dimension 2*N. On exit, this
        array contains the local indexes of the rows of A I have that
        should be copied into U.
LINDXAU (local output)                int *
        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
        array contains  the local destination  information encoded as
        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
        of A should be locally copied into A(-LINDXAU(k),:).
LLEN    (global output)               int *
        On entry,  LLEN  is  an array  of length  NPROW.  On exit, it
        contains how many rows every process has.

See Also

HPL_pdlaswp00N, HPL_pdlaswp00T, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_plindx1.html0000644000000000000000000001075011256503657013715 00000000000000 HPL_plindx1 HPL 2.0 Library Functions September 10, 2008

Name

HPL_plindx1 Compute local swapping index arrays.

Synopsis

#include "hpl.h"

void HPL_plindx1( HPL_T_panel * PANEL, const int K, const int * IPID, int * IPA, int * LINDXA, int * LINDXAU, int * IPLEN, int * IPMAP, int * IPMAPM1, int * PERMU, int * IWORK );

Description

HPL_plindx1 computes two local arrays LINDXA and LINDXAU containing the local source and final destination position resulting from the application of row interchanges. In addition, this function computes three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic mapping information for the spreading phase.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
K       (global input)                const int
        On entry, K specifies the number of entries in IPID.  K is at
        least 2*N, and at most 4*N.
IPID    (global input)                const int *
        On entry,  IPID  is an array of length K. The first K entries
        of that array contain the src and final destination resulting
        from the application of the interchanges.
IPA     (global output)               int *
        On exit,  IPA  specifies  the number of rows that the current
        process row has that either belong to U  or should be swapped
        with remote rows of A.
LINDXA  (global output)               int *
        On entry, LINDXA  is an array of dimension 2*N. On exit, this
        array contains the local indexes of the rows of A I have that
        should be copied into U.
LINDXAU (global output)               int *
        On exit, LINDXAU  is an array of dimension 2*N. On exit, this
        array contains  the local destination  information encoded as
        follows.  If LINDXAU(k) >= 0, row  LINDXA(k)  of A  is  to be
        copied in U at position LINDXAU(k).  Otherwise, row LINDXA(k)
        of A should be locally copied into A(-LINDXAU(k),:).
IPLEN   (global output)               int *
        On entry, IPLEN is an array of dimension NPROW + 1. On  exit,
        this array is such that  IPLEN[i]  is the number of rows of A
        in  the  processes  before  process  IPMAP[i]  after the sort
        with the convention that IPLEN[nprow]  is the total number of
        rows of the panel.  In other words IPLEN[i+1]-IPLEN[i] is the
        local number of rows of A that should be moved to the process
        IPMAP[i]. IPLEN is such that the number of rows of the source
        process  row can be computed as  IPLEN[1] - IPLEN[0], and the
        remaining  entries  of  this  array  are  sorted  so that the
        quantities IPLEN[i+1] - IPLEN[i] are logarithmically sorted.
IPMAP   (global output)               int *
        On entry, IPMAP is an array of dimension NPROW. On exit, this
        array contains  the logarithmic mapping of the processes.  In
        other words, IPMAP[myrow] is the corresponding sorted process
        coordinate.
IPMAPM1 (global output)               int *
        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
        this  array  contains  the inverse of the logarithmic mapping
        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
        [0.. NPROCS)
PERMU   (global output)               int *
        On entry,  PERMU  is an array of dimension JB. On exit, PERMU
        contains  a sequence of permutations,  that should be applied
        in increasing order to permute in place the row panel U.
IWORK   (workspace)                   int *
        On entry, IWORK is a workarray of dimension 2*JB.

See Also

HPL_pdlaswp00N, HPL_pdlaswp00T, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_plindx10.html0000644000000000000000000000561611256503657014002 00000000000000 HPL_plindx10 HPL 2.0 Library Functions September 10, 2008

Name

HPL_plindx10 Compute the logarithmic maps for the spreading.

Synopsis

#include "hpl.h"

void HPL_plindx10( HPL_T_panel * PANEL, const int K, const int * IPID, int * IPLEN, int * IPMAP, int * IPMAPM1 );

Description

HPL_plindx10 computes three arrays IPLEN, IPMAP and IPMAPM1 that contain the logarithmic mapping information for the spreading phase.

Arguments

PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel information.
K       (global input)                const int
        On entry, K specifies the number of entries in IPID.  K is at
        least 2*N, and at most 4*N.
IPID    (global input)                const int *
        On entry,  IPID  is an array of length K. The first K entries
        of that array contain the src and final destination resulting
        from the application of the interchanges.
IPLEN   (global output)               int *
        On entry, IPLEN  is an array of dimension NPROW + 1. On exit,
        this array is such that  IPLEN[i]  is the number of rows of A
        in the processes  before process IMAP[i] after the sort, with
        the convention that IPLEN[nprow] is the total number of rows.
        In other words,  IPLEN[i+1] - IPLEN[i] is the local number of
        rows of  A  that should be moved for each process.  IPLEN  is
        such that the number of rows of the source process row can be
        computed as IPLEN[1] - IPLEN[0], and the remaining entries of
        this  array are sorted  so  that  the quantities IPLEN[i+1] -
        IPLEN[i] are logarithmically sorted.
IPMAP   (global output)               int *
        On entry, IPMAP is an array of dimension NPROW. On exit, this
        array contains  the logarithmic mapping of the processes.  In
        other words, IPMAP[myrow] is the corresponding sorted process
        coordinate.
IPMAPM1 (global output)               int *
        On entry, IPMAPM1  is an array of dimension NPROW.  On  exit,
        this  array  contains  the inverse of the logarithmic mapping
        contained  in  IPMAP:  IPMAPM1[ IPMAP[i] ] = i,  for all i in
        [0.. NPROW)

See Also

HPL_pdlaswp00N, HPL_pdlaswp00T, HPL_pdlaswp01N, HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_pnum.html0000644000000000000000000000271611256503657013320 00000000000000 HPL_pnum HPL 2.0 Library Functions September 10, 2008

Name

HPL_pnum Rank determination.

Synopsis

#include "hpl.h"

int HPL_pnum( const HPL_T_grid * GRID, const int MYROW, const int MYCOL );

Description

HPL_pnum determines the rank of a process as a function of its coordinates in the grid.

Arguments

GRID    (local input)                 const HPL_T_grid *
        On entry,  GRID  points  to the data structure containing the
        process grid information.
MYROW   (local input)                 const int
        On entry,  MYROW  specifies the row coordinate of the process
        whose rank is to be determined. MYROW must be greater than or
        equal to zero and less than NPROW.
MYCOL   (local input)                 const int
        On entry,  MYCOL  specifies  the  column  coordinate  of  the
        process whose rank is to be determined. MYCOL must be greater
        than or equal to zero and less than NPCOL.

See Also

HPL_grid_init, HPL_grid_info, HPL_grid_exit. hpcc-1.4.1/hpl/www/HPL_ptimer.html0000644000000000000000000000276711256503657013647 00000000000000 HPL_ptimer HPL 2.0 Library Functions September 10, 2008

Name

HPL_ptimer Timer facility.

Synopsis

#include "hpl.h"

void HPL_ptimer( const int I );

Description

HPL_ptimer provides a "stopwatch" functionality cpu/wall timer in seconds. Up to 64 separate timers can be functioning at once. The first call starts the timer, and the second stops it. This routine can be disenabled by calling HPL_ptimer_disable(), so that calls to the timer are ignored. This feature can be used to make sure certain sections of code do not affect timings, even if they call routines which have HPL_ptimer calls in them. HPL_ptimer_enable() will enable the timer functionality. One can retrieve the current value of a timer by calling t0 = HPL_ptimer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) where I is the timer index in [0..64). To inititialize the timer functionality, one must have called HPL_ptimer_boot() prior to any of the functions mentioned above.

Arguments

I       (global input)                const int
        On entry, I specifies the timer to stop/start.

See Also

HPL_ptimer_cputime, HPL_ptimer_walltime. hpcc-1.4.1/hpl/www/HPL_ptimer_cputime.html0000644000000000000000000000226311256503657015364 00000000000000 HPL_ptimer_cputime HPL 2.0 Library Functions September 10, 2008

Name

HPL_ptimer_cputime Return the CPU time.

Synopsis

#include "hpl.h"

double HPL_ptimer_cputime();

Description

HPL_ptimer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, the clock() function is used to return an approximation of processor time used by the program. The value returned is the CPU time used so far as a clock_t; to get the number of seconds used, the result is divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C standard library. If HPL_USE_TIMES is defined, the times() function is used instead. This function returns the current process times. times() returns the number of clock ticks that have elapsed since the system has been up. Otherwise and by default, the standard library function getrusage() is used.

See Also

HPL_ptimer_walltime, HPL_ptimer. hpcc-1.4.1/hpl/www/HPL_ptimer_walltime.html0000644000000000000000000000115411256503657015532 00000000000000 HPL_ptimer_walltime HPL 2.0 Library Functions September 10, 2008

Name

HPL_ptimer_walltime Return the elapsed (wall-clock) time.

Synopsis

#include "hpl.h"

double HPL_ptimer_walltime();

Description

HPL_ptimer_walltime returns the elapsed (wall-clock) time.

See Also

HPL_ptimer_cputime, HPL_ptimer. hpcc-1.4.1/hpl/www/HPL_pwarn.html0000644000000000000000000000306711256503657013470 00000000000000 HPL_pwarn HPL 2.0 Library Functions September 10, 2008

Name

HPL_pwarn displays an error message.

Synopsis

#include "hpl.h"

void HPL_pwarn( FILE * STREAM, int LINE, const char * SRNAME, const char * FORM, ... );

Description

HPL_pwarn displays an error message.

Arguments

STREAM  (local input)                 FILE *
        On entry, STREAM specifies the output stream.
LINE    (local input)                 int
        On entry,  LINE  specifies the line  number in the file where
        the  error  has  occured.  When  LINE  is not a positive line
        number, it is ignored.
SRNAME  (local input)                 const char *
        On entry, SRNAME  should  be the name of the routine  calling
        this error handler.
FORM    (local input)                 const char *
        On entry, FORM specifies the format, i.e., how the subsequent
        arguments are converted for output.
        (local input)                 ...
        On entry,  ...  is the list of arguments to be printed within
        the format string.

See Also

HPL_pabort, HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_rand.html0000644000000000000000000000251411256503657013261 00000000000000 HPL_rand HPL 2.0 Library Functions September 10, 2008

Name

HPL_rand random number generator.

Synopsis

#include "hpl.h"

double HPL_rand();

Description

HPL_rand generates the next number in the random sequence. This function ensures that this number lies in the interval (-0.5, 0.5]. The static array irand contains the information (2 integers) required to generate the next number in the sequence X(n). This number is computed as X(n) = (2^32 * irand[1] + irand[0]) / d - 0.5, where the constant d is the largest 64 bit positive integer. The array irand is then updated for the generation of the next number X(n+1) in the random sequence as follows X(n+1) = a * X(n) + c. The constants a and c should have been preliminarily stored in the arrays ias and ics as 2 pairs of integers. The initialization of ias, ics and irand is performed by the function HPL_setran.

See Also

HPL_ladd, HPL_lmul, HPL_setran, HPL_xjumpm, HPL_jumpit. hpcc-1.4.1/hpl/www/HPL_recv.html0000644000000000000000000000343511256503657013277 00000000000000 HPL_recv HPL 2.0 Library Functions September 10, 2008

Name

HPL_recv Receive a message.

Synopsis

#include "hpl.h"

int HPL_recv( double * RBUF, int RCOUNT, int SRC, int RTAG, MPI_Comm COMM );

Description

HPL_recv is a simple wrapper around MPI_Recv. Its main purpose is to allow for some experimentation / tuning of this simple routine. Successful completion is indicated by the returned error code HPL_SUCCESS. In the case of messages of length less than or equal to zero, this function returns immediately.

Arguments

RBUF    (local output)                double *
        On entry, RBUF specifies the starting address of buffer to be
        received.
RCOUNT  (local input)                 int
        On entry,  RCOUNT  specifies  the number  of double precision
        entries in RBUF. RCOUNT must be at least zero.
SRC     (local input)                 int
        On entry, SRC  specifies the rank of the  sending  process in
        the communication space defined by COMM.
RTAG    (local input)                 int
        On entry,  STAG specifies the message tag to be used for this
        communication operation.
COMM    (local input)                 MPI_Comm
        The MPI communicator identifying the communication space.

See Also

HPL_send, HPL_sendrecv. hpcc-1.4.1/hpl/www/HPL_reduce.html0000644000000000000000000000421311256503657013602 00000000000000 HPL_reduce HPL 2.0 Library Functions September 10, 2008

Name

HPL_reduce Reduce operation.

Synopsis

#include "hpl.h"

int HPL_reduce( void * BUFFER, const int COUNT, const HPL_T_TYPE DTYPE, const HPL_T_OP OP, const int ROOT, MPI_Comm COMM );

Description

HPL_reduce performs a global reduce operation across all processes of a group. Note that the input buffer is used as workarray and in all processes but the accumulating process corrupting the original data.

Arguments

BUFFER  (local input/output)          void *
        On entry,  BUFFER  points to  the  buffer to be  reduced.  On
        exit,  and  in process of rank  ROOT  this array contains the
        reduced data.  This  buffer  is also used as workspace during
        the operation in the other processes of the group.
COUNT   (global input)                const int
        On entry,  COUNT  indicates the number of entries in  BUFFER.
        COUNT must be at least zero.
DTYPE   (global input)                const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.
OP      (global input)                const HPL_T_OP 
        On entry, OP is a pointer to the local combine function.
ROOT    (global input)                const int
        On entry, ROOT is the coordinate of the accumulating process.
COMM    (global/local input)          MPI_Comm
        The MPI communicator identifying the process collection.

See Also

HPL_broadcast, HPL_all_reduce, HPL_barrier, HPL_min, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_rollN.html0000644000000000000000000000573711256503657013435 00000000000000 HPL_rollN HPL 2.0 Library Functions September 10, 2008

Name

HPL_rollN Roll U and forward the column panel.

Synopsis

#include "hpl.h"

void HPL_rollN( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int N, double * U, const int LDU, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 );

Description

HPL_rollN rolls the local arrays containing the local pieces of U, so that on exit to this function U is replicated in every process row. In addition, this function probe for the presence of the column panel and forwards it when available.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be rolled) information.
N       (local input)                 const int
        On entry, N specifies the number of columns of  U.  N must be
        at least zero.
U       (local input/output)          double *
        On entry,  U  is an array of dimension (LDU,*) containing the
        local pieces of U in each process row.
LDU     (local input)                 const int
        On entry, LDU specifies the local leading dimension of U. LDU
        should be at least  MAX(1,IPLEN[NPROW]).
IPLEN   (global input)                const int *
        On entry, IPLEN is an array of dimension NPROW+1.  This array
        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
        in each process row.
IPMAP   (global input)                const int *
        On entry, IMAP  is an array of dimension  NPROW.  This  array
        contains  the  logarithmic mapping of the processes. In other
        words,  IMAP[myrow]  is the absolute coordinate of the sorted
        process.
IPMAPM1 (global input)                const int *
        On entry,  IMAPM1  is an array of dimension NPROW. This array
        contains  the inverse of the logarithmic mapping contained in
        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.

See Also

HPL_pdlaswp01N. hpcc-1.4.1/hpl/www/HPL_rollT.html0000644000000000000000000000572711256503657013442 00000000000000 HPL_rollT HPL 2.0 Library Functions September 10, 2008

Name

HPL_rollT Roll U and forward the column panel.

Synopsis

#include "hpl.h"

void HPL_rollT( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const int N, double * U, const int LDU, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 );

Description

HPL_rollT rolls the local arrays containing the local pieces of U, so that on exit to this function U is replicated in every process row. In addition, this function probe for the presence of the column panel and forwards it when available.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be rolled) information.
N       (local input)                 const int
        On entry, N specifies the local number of rows of  U.  N must
        be at least zero.
U       (local input/output)          double *
        On entry,  U  is an array of dimension (LDU,*) containing the
        local pieces of U in each process row.
LDU     (local input)                 const int
        On entry, LDU specifies the local leading dimension of U. LDU
        should be at least  MAX(1,N).
IPLEN   (global input)                const int *
        On entry, IPLEN is an array of dimension NPROW+1.  This array
        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
        in each process row.
IPMAP   (global input)                const int *
        On entry, IMAP  is an array of dimension  NPROW.  This  array
        contains  the  logarithmic mapping of the processes. In other
        words,  IMAP[myrow]  is the absolute coordinate of the sorted
        process.
IPMAPM1 (global input)                const int *
        On entry,  IMAPM1  is an array of dimension NPROW. This array
        contains  the inverse of the logarithmic mapping contained in
        IMAP: For i in [0.. NPROW) IMAPM1[IMAP[i]] = i.

See Also

HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_sdrv.html0000644000000000000000000000457511256503657013324 00000000000000 HPL_sdrv HPL 2.0 Library Functions September 10, 2008

Name

HPL_sdrv Send and receive a message.

Synopsis

#include "hpl.h"

int HPL_sdrv( double * SBUF, int SCOUNT, int STAG, double * RBUF, int RCOUNT, int RTAG, int PARTNER, MPI_Comm COMM );

Description

HPL_sdrv is a simple wrapper around MPI_Sendrecv. Its main purpose is to allow for some experimentation and tuning of this simple function. Messages of length less than or equal to zero are not sent nor received. Successful completion is indicated by the returned error code HPL_SUCCESS.

Arguments

SBUF    (local input)                 double *
        On entry, SBUF specifies the starting address of buffer to be
        sent.
SCOUNT  (local input)                 int
        On entry,  SCOUNT  specifies  the number  of double precision
        entries in SBUF. SCOUNT must be at least zero.
STAG    (local input)                 int
        On entry,  STAG  specifies the message tag to be used for the
        sending communication operation.
RBUF    (local output)                double *
        On entry, RBUF specifies the starting address of buffer to be
        received.
RCOUNT  (local input)                 int
        On entry,  RCOUNT  specifies  the number  of double precision
        entries in RBUF. RCOUNT must be at least zero.
RTAG    (local input)                 int
        On entry,  RTAG  specifies the message tag to be used for the
        receiving communication operation.
PARTNER (local input)                 int
        On entry,  PARTNER  specifies  the rank of the  collaborative
        process in the communication space defined by COMM.
COMM    (local input)                 MPI_Comm
        The MPI communicator identifying the communication space.

See Also

HPL_send, HPL_recv. hpcc-1.4.1/hpl/www/HPL_send.html0000644000000000000000000000342711256503657013272 00000000000000 HPL_send HPL 2.0 Library Functions September 10, 2008

Name

HPL_send Send a message.

Synopsis

#include "hpl.h"

int HPL_send( double * SBUF, int SCOUNT, int DEST, int STAG, MPI_Comm COMM );

Description

HPL_send is a simple wrapper around MPI_Send. Its main purpose is to allow for some experimentation / tuning of this simple routine. Successful completion is indicated by the returned error code MPI_SUCCESS. In the case of messages of length less than or equal to zero, this function returns immediately.

Arguments

SBUF    (local input)                 double *
        On entry, SBUF specifies the starting address of buffer to be
        sent.
SCOUNT  (local input)                 int
        On entry,  SCOUNT  specifies  the number of  double precision
        entries in SBUF. SCOUNT must be at least zero.
DEST    (local input)                 int
        On entry, DEST specifies the rank of the receiving process in
        the communication space defined by COMM.
STAG    (local input)                 int
        On entry,  STAG specifies the message tag to be used for this
        communication operation.
COMM    (local input)                 MPI_Comm
        The MPI communicator identifying the communication space.

See Also

HPL_recv, HPL_sendrecv. hpcc-1.4.1/hpl/www/HPL_setran.html0000644000000000000000000000310011256503657013621 00000000000000 HPL_setran HPL 2.0 Library Functions September 10, 2008

Name

HPL_setran Manage the random number generator.

Synopsis

#include "hpl.h"

void HPL_setran( const int OPTION, int * IRAN );

Description

HPL_setran initializes the random generator with the encoding of the first number X(0) in the sequence, and the constants a and c used to compute the next element in the sequence: X(n+1) = a*X(n) + c. X(0), a and c are stored in the static variables irand, ias and ics. When OPTION is 0 (resp. 1 and 2), irand (resp. ia and ic) is set to the values of the input array IRAN. When OPTION is 3, IRAN is set to the current value of irand, and irand is then incremented.

Arguments

OPTION  (local input)                 const int
        On entry, OPTION  is an integer that specifies the operations
        to be performed on the random generator as specified above.
IRAN    (local input/output)          int *
        On entry,  IRAN is an array of dimension 2, that contains the
        16-lower and 15-higher bits of a random number.

See Also

HPL_ladd, HPL_lmul, HPL_xjumpm, HPL_jumpit, HPL_rand. hpcc-1.4.1/hpl/www/HPL_spreadN.html0000644000000000000000000000765111256503657013740 00000000000000 HPL_spreadN HPL 2.0 Library Functions September 10, 2008

Name

HPL_spreadN Spread row panel U and forward current column panel.

Synopsis

#include "hpl.h"

void HPL_spreadN( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_SIDE SIDE, const int N, double * U, const int LDU, const int SRCDIST, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 );

Description

HPL_spreadN spreads the local array containing local pieces of U, so that on exit to this function, a piece of U is contained in every process row. The array IPLEN contains the number of rows of U, that should be spread on any given process row. This function also probes for the presence of the column panel PBCST. In case of success, this panel will be forwarded. If PBCST is NULL on input, this probing mechanism will be disabled.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be spread) information.
SIDE    (global input)                const enum HPL_SIDE
        On entry, SIDE specifies whether the local piece of U located
        in process IPMAP[SRCDIST] should be spread to the right or to
        the left. This feature is used by the equilibration process.
N       (global input)                const int
        On entry,  N  specifies  the  local number of columns of U. N
        must be at least zero.
U       (local input/output)          double *
        On entry,  U  is an array of dimension (LDU,*) containing the
        local pieces of U.
LDU     (local input)                 const int
        On entry, LDU specifies the local leading dimension of U. LDU
        should be at least MAX(1,IPLEN[nprow]).
SRCDIST (local input)                 const int
        On entry,  SRCDIST  specifies the source process that spreads
        its piece of U.
IPLEN   (global input)                const int *
        On entry, IPLEN is an array of dimension NPROW+1.  This array
        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
        in each process before process IPMAP[i], with the  convention
        that IPLEN[nprow] is the total number of rows. In other words
        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
        should be moved to process IPMAP[i].
IPMAP   (global input)                const int *
        On entry, IPMAP is an array of dimension  NPROW.  This  array
        contains  the  logarithmic mapping of the processes. In other
        words, IPMAP[myrow]  is the absolute coordinate of the sorted
        process.
IPMAPM1 (global input)                const int *
        On entry,  IPMAPM1 is an array of dimension NPROW. This array
        contains  the inverse of the logarithmic mapping contained in
        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.

See Also

HPL_pdlaswp01N. hpcc-1.4.1/hpl/www/HPL_spreadT.html0000644000000000000000000000764211256503657013746 00000000000000 HPL_spreadT HPL 2.0 Library Functions September 10, 2008

Name

HPL_spreadT Spread row panel U and forward current column panel.

Synopsis

#include "hpl.h"

void HPL_spreadT( HPL_T_panel * PBCST, int * IFLAG, HPL_T_panel * PANEL, const enum HPL_SIDE SIDE, const int N, double * U, const int LDU, const int SRCDIST, const int * IPLEN, const int * IPMAP, const int * IPMAPM1 );

Description

HPL_spreadT spreads the local array containing local pieces of U, so that on exit to this function, a piece of U is contained in every process row. The array IPLEN contains the number of columns of U, that should be spread on any given process row. This function also probes for the presence of the column panel PBCST. If available, this panel will be forwarded. If PBCST is NULL on input, this probing mechanism will be disabled.

Arguments

PBCST   (local input/output)          HPL_T_panel *
        On entry,  PBCST  points to the data structure containing the
        panel (to be broadcast) information.
IFLAG   (local input/output)          int *
        On entry, IFLAG  indicates  whether or not  the broadcast has
        already been completed.  If not,  probing will occur, and the
        outcome will be contained in IFLAG on exit.
PANEL   (local input/output)          HPL_T_panel *
        On entry,  PANEL  points to the data structure containing the
        panel (to be spread) information.
SIDE    (global input)                const enum HPL_SIDE
        On entry, SIDE specifies whether the local piece of U located
        in process IPMAP[SRCDIST] should be spread to the right or to
        the left. This feature is used by the equilibration process.
N       (global input)                const int
        On entry,  N  specifies the local number of rows of U. N must
        be at least zero.
U       (local input/output)          double *
        On entry,  U  is an array of dimension (LDU,*) containing the
        local pieces of U.
LDU     (local input)                 const int
        On entry, LDU specifies the local leading dimension of U. LDU
        should be at least MAX(1,N).
SRCDIST (local input)                 const int
        On entry,  SRCDIST  specifies the source process that spreads
        its piece of U.
IPLEN   (global input)                const int *
        On entry, IPLEN is an array of dimension NPROW+1.  This array
        is such that IPLEN[i+1] - IPLEN[i] is the number of rows of U
        in each process before process IPMAP[i], with the  convention
        that IPLEN[nprow] is the total number of rows. In other words
        IPLEN[i+1] - IPLEN[i]  is  the local number of rows of U that
        should be moved to process IPMAP[i].
IPMAP   (global input)                const int *
        On entry, IPMAP is an array of dimension  NPROW.  This  array
        contains  the  logarithmic mapping of the processes. In other
        words, IPMAP[myrow]  is the absolute coordinate of the sorted
        process.
IPMAPM1 (global input)                const int *
        On entry,  IPMAPM1 is an array of dimension NPROW. This array
        contains  the inverse of the logarithmic mapping contained in
        IPMAP: For i in [0.. NPROW) IPMAPM1[IPMAP[i]] = i.

See Also

HPL_pdlaswp01T. hpcc-1.4.1/hpl/www/HPL_sum.html0000644000000000000000000000311611256503657013140 00000000000000 HPL_sum HPL 2.0 Library Functions September 10, 2008

Name

HPL_sum Combine (sum) two buffers.

Synopsis

#include "hpl.h"

void HPL_sum( const int N, const void * IN, void * INOUT, const HPL_T_TYPE DTYPE );

Description

HPL_sum combines (sum) two buffers.

Arguments

N       (input)                       const int
        On entry, N  specifies  the  length  of  the  buffers  to  be
        combined. N must be at least zero.
IN      (input)                       const void *
        On entry, IN points to the input-only buffer to be combined.
INOUT   (input/output)                void *
        On entry, INOUT  points  to  the  input-output  buffer  to be
        combined.  On exit,  the  entries of this array contains  the
        combined results.
DTYPE   (input)                       const HPL_T_TYPE
        On entry,  DTYPE  specifies the type of the buffers operands.

See Also

HPL_broadcast, HPL_reduce, HPL_all_reduce, HPL_barrier, HPL_min, HPL_max, HPL_sum. hpcc-1.4.1/hpl/www/HPL_timer.html0000644000000000000000000000275711256503657013466 00000000000000 HPL_timer HPL 2.0 Library Functions September 10, 2008

Name

HPL_timer Timer facility.

Synopsis

#include "hpl.h"

void HPL_timer( const int I );

Description

HPL_timer provides a "stopwatch" functionality cpu/wall timer in seconds. Up to 64 separate timers can be functioning at once. The first call starts the timer, and the second stops it. This routine can be disenabled by calling HPL_timer_disable(), so that calls to the timer are ignored. This feature can be used to make sure certain sections of code do not affect timings, even if they call routines which have HPL_timer calls in them. HPL_timer_enable() will re-enable the timer functionality. One can retrieve the current value of a timer by calling t0 = HPL_timer_inquire( HPL_WALL_TIME | HPL_CPU_TIME, I ) where I is the timer index in [0..64). To initialize the timer functionality, one must have called HPL_timer_boot() prior to any of the functions mentioned above.

Arguments

I       (global input)                const int
        On entry, I specifies the timer to stop/start.

See Also

HPL_timer_cputime, HPL_timer_walltime. hpcc-1.4.1/hpl/www/HPL_timer_cputime.html0000644000000000000000000000225411256503657015204 00000000000000 HPL_timer_cputime HPL 2.0 Library Functions September 10, 2008

Name

HPL_timer_cputime Return the CPU time.

Synopsis

#include "hpl.h"

double HPL_timer_cputime();

Description

HPL_timer_cputime returns the cpu time. If HPL_USE_CLOCK is defined, the clock() function is used to return an approximation of processor time used by the program. The value returned is the CPU time used so far as a clock_t; to get the number of seconds used, the result is divided by CLOCKS_PER_SEC. This function is part of the ANSI/ISO C standard library. If HPL_USE_TIMES is defined, the times() function is used instead. This function returns the current process times. times() returns the number of clock ticks that have elapsed since the system has been up. Otherwise and by default, the standard library function getrusage() is used.

See Also

HPL_timer_walltime, HPL_timer. hpcc-1.4.1/hpl/www/HPL_timer_walltime.html0000644000000000000000000000114411256503657015351 00000000000000 HPL_timer_walltime HPL 2.0 Library Functions September 10, 2008

Name

HPL_timer_walltime Return the elapsed (wall-clock) time.

Synopsis

#include "hpl.h"

double HPL_timer_walltime();

Description

HPL_timer_walltime returns the elapsed (wall-clock) time.

See Also

HPL_timer_cputime, HPL_timer. hpcc-1.4.1/hpl/www/HPL_warn.html0000644000000000000000000000336511256503657013311 00000000000000 HPL_warn HPL 2.0 Library Functions September 10, 2008

Name

HPL_warn displays an error message.

Synopsis

#include "hpl.h"

void HPL_warn( FILE * STREAM, int LINE, const char * SRNAME, const char * FORM, ... );

Description

HPL_warn displays an error message.

Arguments

STREAM  (local input)                 FILE *
        On entry, STREAM specifies the output stream.
LINE    (local input)                 int
        On entry,  LINE  specifies the line  number in the file where
        the  error  has  occured.  When  LINE  is not a positive line
        number, it is ignored.
SRNAME  (local input)                 const char *
        On entry, SRNAME  should  be the name of the routine  calling
        this error handler.
FORM    (local input)                 const char *
        On entry, FORM specifies the format, i.e., how the subsequent
        arguments are converted for output.
        (local input)                 ...
        On entry,  ...  is the list of arguments to be printed within
        the format string.

Example

#include "hpl.h"

int main(int argc, char *argv[])
{
   HPL_warn( stderr, __LINE__, __FILE__,
             "Demo.\n" );
   exit(0); return(0);
}

See Also

HPL_abort, HPL_fprintf. hpcc-1.4.1/hpl/www/HPL_xjumpm.html0000644000000000000000000000640311256503657013656 00000000000000 HPL_xjumpm HPL 2.0 Library Functions September 10, 2008

Name

HPL_xjumpm Compute constants to jump in the random sequence.

Synopsis

#include "hpl.h"

void HPL_xjumpm( const int JUMPM, int * MULT, int * IADD, int * IRANN, int * IRANM, int * IAM, int * ICM );

Description

HPL_xjumpm computes the constants A and C to jump JUMPM numbers in the random sequence: X(n+JUMPM) = A*X(n)+C. The constants encoded in MULT and IADD specify how to jump from one entry in the sequence to the next.

Arguments

JUMPM   (local input)                 const int
        On entry,  JUMPM  specifies  the  number  of entries  in  the
        sequence to jump over. When JUMPM is less or equal than zero,
        A and C are not computed, IRANM is set to IRANN corresponding
        to a jump of size zero.
MULT    (local input)                 int *
        On entry, MULT is an array of dimension 2,  that contains the
        16-lower  and 15-higher bits of the constant  a  to jump from
        X(n) to X(n+1) = a*X(n) + c in the random sequence.
IADD    (local input)                 int *
        On entry, IADD is an array of dimension 2,  that contains the
        16-lower  and 15-higher bits of the constant  c  to jump from
        X(n) to X(n+1) = a*X(n) + c in the random sequence.
IRANN   (local input)                 int *
        On entry, IRANN is an array of dimension 2. that contains the
        16-lower and 15-higher bits of the encoding of X(n).
IRANM   (local output)                int *
        On entry,  IRANM  is an array of dimension 2.   On exit, this
        array  contains respectively  the 16-lower and 15-higher bits
        of the encoding of X(n+JUMPM).
IAM     (local output)                int *
        On entry, IAM is an array of dimension 2. On exit, when JUMPM
        is  greater  than  zero,  this  array  contains  the  encoded
        constant  A  to jump from  X(n) to  X(n+JUMPM)  in the random
        sequence. IAM(0:1)  contains  respectively  the  16-lower and
        15-higher  bits  of this constant  A. When  JUMPM  is less or
        equal than zero, this array is not referenced.
ICM     (local output)                int *
        On entry, ICM is an array of dimension 2. On exit, when JUMPM
        is  greater  than  zero,  this  array  contains  the  encoded
        constant  C  to jump from  X(n)  to  X(n+JUMPM) in the random
        sequence. ICM(0:1)  contains  respectively  the  16-lower and
        15-higher  bits  of this constant  C. When  JUMPM  is less or
        equal than zero, this array is not referenced.

See Also

HPL_ladd, HPL_lmul, HPL_setran, HPL_jumpit, HPL_rand. hpcc-1.4.1/hpl/www/algorithm.html0000644000000000000000000003376711256503657013636 00000000000000 HPL Algorithm

HPL Algorithm

This page provides a high-level description of the algorithm used in this package. As indicated below, HPL contains in fact many possible variants for various operations. Defaults could have been chosen, or even variants could be selected during the execution. Due to the performance requirements, it was decided to leave the user with the opportunity of choosing, so that an "optimal" set of parameters could easily be experimentally determined for a given machine configuration. From a numerical accuracy point of view, all possible combinations are rigorously equivalent to each other even though the result may slightly differ (bit-wise).


Main Algorithm

This software package solves a linear system of order n: A x = b by first computing the LU factorization with row partial pivoting of the n-by-n+1 coefficient matrix [A b] = [[L,U] y]. Since the lower triangular factor L is applied to b as the factorization progresses, the solution x is obtained by solving the upper triangular system U x = y. The lower triangular matrix L is left unpivoted and the array of pivots is not returned.

The data is distributed onto a two-dimensional P-by-Q grid of processes according to the block-cyclic scheme to ensure "good" load balance as well as the scalability of the algorithm. The n-by-n+1 coefficient matrix is first logically partitioned into nb-by-nb blocks, that are cyclically "dealt" onto the P-by-Q process grid. This is done in both dimensions of the matrix.
The right-looking variant has been chosen for the main loop of the LU factorization. This means that at each iteration of the loop a panel of nb columns is factorized, and the trailing submatrix is updated. Note that this computation is thus logically partitioned with the same block size nb that was used for the data distribution.

Panel Factorization

At a given iteration of the main loop, and because of the cartesian property of the distribution scheme, each panel factorization occurs in one column of processes. This particular part of the computation lies on the critical path of the overall algorithm. The user is offered the choice of three (Crout, left- and right-looking) matrix-multiply based recursive variants. The software also allows the user to choose in how many sub-panels the current panel should be divided into during the recursion. Furthermore, one can also select at run-time the recursion stopping criterium in terms of the number of columns left to factorize. When this threshold is reached, the sub-panel will then be factorized using one of the three Crout, left- or right-looking matrix-vector based variant. Finally, for each panel column the pivot search, the associated swap and broadcast operation of the pivot row are combined into one single communication step. A binary-exchange (leave-on-all) reduction performs these three operations at once.

Panel Broadcast

Once the panel factorization has been computed, this panel of columns is broadcast to the other process columns. There are many possible broadcast algorithms and the software currently offers 6 variants to choose from. These variants are described below assuming that process 0 is the source of the broadcast for convenience. "->" means "sends to".
  • Increasing-ring: 0 -> 1; 1 -> 2; 2 -> 3 and so on. This algorithm is the classic one; it has the caveat that process 1 has to send a message.
  • Increasing-ring (modified): 0 -> 1; 0 -> 2; 2 -> 3 and so on. Process 0 sends two messages and process 1 only receives one message. This algorithm is almost always better, if not the best.
  • Increasing-2-ring: The Q processes are divided into two parts: 0 -> 1 and 0 -> Q/2; Then processes 1 and Q/2 act as sources of two rings: 1 -> 2, Q/2 -> Q/2+1; 2 -> 3, Q/2+1 -> to Q/2+2 and so on. This algorithm has the advantage of reducing the time by which the last process will receive the panel at the cost of process 0 sending 2 messages.
  • Increasing-2-ring (modified): As one may expect, first 0 -> 1, then the Q-1 processes left are divided into two equal parts: 0 -> 2 and 0 -> Q/2; Processes 2 and Q/2 act then as sources of two rings: 2 -> 3, Q/2 -> Q/2+1; 3 -> 4, Q/2+1 -> to Q/2+2 and so on. This algorithm is probably the most serious competitor to the increasing ring modified variant.
  • Long (bandwidth reducing): as opposed to the previous variants, this algorithm and its follower synchronize all processes involved in the operation. The message is chopped into Q equal pieces that are scattered across the Q processes.
    The pieces are then rolled in Q-1 steps. The scatter phase uses a binary tree and the rolling phase exclusively uses mutual message exchanges. In odd steps 0 <-> 1, 2 <-> 3, 4 <-> 5 and so on; in even steps Q-1 <-> 0, 1 <-> 2, 3 <-> 4, 5 <-> 6 and so on.
    More messages are exchanged, however the total volume of communication is independent of Q, making this algorithm particularly suitable for large messages. This algorithm becomes competitive when the nodes are "very fast" and the network (comparatively) "very slow".

  • Long (bandwidth reducing modified): same as above, except that 0 -> 1 first, and then the Long variant is used on processes 0,2,3,4 .. Q-1.

The rings variants are distinguished by a probe mechanism that activates them. In other words, a process involved in the broadcast and different from the source asynchronously probes for the message to receive. When the message is available the broadcast proceeds, and otherwise the function returns. This allows to interleave the broadcast operation with the update phase. This contributes to reduce the idle time spent by those processes waiting for the factorized panel. This mechanism is necessary to accomodate for various computation/communication performance ratio.


Look-ahead

Once the panel has been broadcast or say during this broadcast operation, the trailing submatrix is updated using the last panel in the look-ahead pipe: as mentioned before, the panel factorization lies on the critical path, which means that when the kth panel has been factorized and then broadcast, the next most urgent task to complete is the factorization and broadcast of the k+1 th panel. This technique is often refered to as "look-ahead" or "send-ahead" in the literature. This package allows to select various "depth" of look-ahead. By convention, a depth of zero corresponds to no lookahead, in which case the trailing submatrix is updated by the panel currently broadcast. Look-ahead consumes some extra memory to essentially keep all the panels of columns currently in the look-ahead pipe. A look-ahead of depth 1 (maybe 2) is likely to achieve the best performance gain.


Update

The update of the trailing submatrix by the last panel in the look-ahead pipe is made of two phases. First, the pivots must be applied to form the current row panel U. U should then be solved by the upper triangle of the column panel. U finally needs to be broadcast to each process row so that the local rank-nb update can take place. We choose to combine the swapping and broadcast of U at the cost of replicating the solve. Two algorithms are available for this communication operation.
  • Binary-exchange: this is a modified variant of the binary-exchange (leave on all) reduction operation. Every process column performs the same operation. The algorithm essentially works as follows. It pretends reducing the row panel U, but at the beginning the only valid copy is owned by the current process row. The other process rows will contribute rows of A they own that should be copied in U and replace them with rows that were originally in the current process row. The complete operation is performed in log(P) steps. For the sake of simplicity, let assume that P is a power of two. At step k, process row p exchanges a message with process row p+2^k. There are essentially two cases. First, one of those two process rows has received U in a previous step. The exchange occurs. One process swaps its local rows of A into U. Both processes copy in U remote rows of A. Second, none of those process rows has received U, the exchange occurs, and both processes simply add those remote rows to the list they have accumulated so far. At each step, a message of the size of U is exchanged by at least one pair of process rows.

  • Long: this is a bandwidth reducing variant accomplishing the same task. The row panel is first spread (using a tree) among the process rows with respect to the pivot array. This is a scatter (V variant for MPI users). Locally, every process row then swaps these rows with the the rows of A it owns and that belong to U. These buffers are then rolled (P-1 steps) to finish the broadcast of U. Every process row permutes U and proceed with the computational part of the update. A couple of notes: process rows are logarithmically sorted before spreading, so that processes receiving the largest number of rows are first in the tree. This makes the communication volume optimal for this phase. Finally, before rolling and after the local swap, an equilibration phase occurs during which the local pieces of U are uniformly spread across the process rows. A tree-based algorithm is used. This operation is necessary to keep the rolling phase optimal even when the pivot rows are not equally distributed in process rows. This algorithm has a complexity in terms of communication volume that solely depends on the size of U. In particular, the number of process rows only impacts the number of messages exchanged. It will thus outperforms the previous variant for large problems on large machine configurations.

The user can select any of the two variants above. In addition, a mix is possible as well. The "binary-exchange" algorithm will be used when U contains at most a certain number of columns. Choosing at least the block size nb as the threshold value is clearly recommended when look-ahead is on.


Backward Substitution

The factorization has just now ended, the back-substitution remains to be done. For this, we choose a look-ahead of depth one variant. The right-hand-side is forwarded in process rows in a decreasing-ring fashion, so that we solve Q * nb entries at a time. At each step, this shrinking piece of the right-hand-side is updated. The process just above the one owning the current diagonal block of the matrix A updates first its last nb piece of x, forwards it to the previous process column, then broadcast it in the process column in a decreasing-ring fashion as well. The solution is then updated and sent to the previous process column. The solution of the linear system is left replicated in every process row.


Checking the Solution

To verify the result obtained, the input matrix and right-hand side are regenerated. The normwise backward error (see formula below) is then computed. A solution is considered as "numerically correct" when this quantity is less than a threshold value of the order of 1.0. In the expression below, eps is the relative (distributed-memory) machine precision.
  • || Ax - b ||_oo / ( eps * ( || A ||_oo * || x ||_oo + || b ||_oo ) * n )

[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/aprunner.gif0000644000000000000000000001310011256503657013256 00000000000000GIF87aå÷ÿÿÿ!ù,å@ÿH° Áƒ*\Ȱ¡Ã‡#JœH±¢Å‹3jÜȱ£Ç‹>ŠI²¤É“(Sª´ e€Cºt¹²¦Í›8sêÜY&Mš/ ÂäI´cK™‘-Ê´©ÓƒGUúéÓ« B”¹«×¢GƒžT ”êÔªhEÎ„Š¶kD®b†Å¨UhV¹_óÞ¬7i\©µÖõÙ6íÛKçöä‹wá`»ݲ•ÜX¯e“”{eœ0±_Í wÍçŽy–6qçä›÷ßxúíxͤ˜_¤I_U®ÅžmõÑ5 l¯AÅÚ_û)×Ún«}v`r¾Æu~ˆ¡fv† l…y¶ Nã¡È›kÈ=f×|³5g…èÝØ 0ŽX“ô‰wãŠDµšiâ¤ykíV¢†"%wé)HX“0næ^~Evyb[ÚÅÜú7¢oMú¦“ˆªÅcN×y9Rœ‡™nwV$šÝØ_„tÒ™€~)`p-6¤ pn)§a‚5ÈRhÅÍ(Qœ|éi!“÷™yç~6î9)\õYµ¤e޶we™Š~Z£•Žÿ™ŸYô‰H«T²ê|„F™ b©~¬W–‡žšuîZÝulèt¤2W¦i (©J+Ü©‚¶b±8Z«%·¸wè”6Á÷ëªAz[|¶¦ÉãYlÛ­»z] )–IjÔߌ°^Õ«|±b+í½<œn»‹êÊptÒ¡«ayö®„n•#ŒjÅþ'#©dú¬‹ JŒZÈׯq¿v vžŽqæbÆ‘}«ol®¦¬ó{êÒœ›–‡Nœ²¥qJÒOExÊ;7íÔÊÁA=iÒï8t¯CSL®Ó\wÝ£‚W½kµcK}´Ù^§XÁÇÝJmh«-÷Üt×m÷Ýxç­w¾n ½÷߀Å/žÿn8XêtÜsŸ*ã‡ß .…5B5¯s†6H‘ Û§âñ–hnH›Ú÷¹q¿¹,wö9Æ´ÓÙgyç—†kuŽÉbת²l×Ü2c©Õ^«{Û¢èãì´¿Wt̪þðÏQ/»nf©ggg›9»\m³Éûóè~f=‰K/#õø…v±ë¼›鉢Ǫp÷ÍÏpâ¦é–‘æ`Ì”:—E0(ùl]ô›SïBT0^MÈhm3™²þ•‘ÏŽyØëT ¨$’,ÐcÝë.(!ê€þ¡Øênö6B™É~WÉÈò2‚‰m>¡È ´¼Wíd"P¦®é^XCÞÿðÀö¶Z‘©b0Ôaû6تɹpôððõ´+åg û•Å¢(ÅJO9·ËÚ½HAIE,hTtV}7-&) fó›Ó˜µ; /Q?ÄWo^ö">ÏäaW²Þ(Qdr{ày¬øª\µh=‹×ôLXÈ¥eŠŒ=B™Šºʼü ŽN<$¥°tU£*͕Ǔ®“)hc£àvyS£:õ©PªT§JÕªZõªXÍê/+¤Õ®î D£óªX#ù/$õ¬;+«¥èÕÿ¢%e¥ ± :η¦Œ•):fEíH»2h’Ùô#\çhJx®Ž­~Ej+)IÈ€Š¯¯ÐŒ¦`YšØAyiƒ`5EÃoQ™%a'#Ôë½´²D…¨òÙ˜U]6€Ö%¬U¾ˆÚ’¤âB­òJ5¦kmQ7L,-Τ†¾Tnn– ©*EÓ«Ö‡ëO+ÚNt|®Üß<ÇE>®.qInÝ'&í¥P46Ò Z«NÕÇ2æé+Úò®n‘©¼ì•‚÷[’˜¥š°î.¹Å…å.TËù"òŸ‹ä£qÑìâVa¬ojù³ß0ùj´f/¨fË3€ÍË¥Îd.o&;a «ïÿI?}lvÂK?LÉŽ«ÍsÍš¡ÆV½Ñó!rRDžÔä™Ê½1j¡©SÞP¾Ø •’¼³-lÝ&[&Ÿa Iå±2Žl54¯ 4?>¶„#ëŽ.ËF­˜z8}e2d[¬WÎq§£±‰ŸYã™NPº¦Ѹe?}±“ Ü&CO,³/Ñ(ŽŸ¬a•yF¯˜}k£­àW4Gš™I¬²á…Båbª…Ë,—æÈfÄên§¹•¤ÒvA h«*³oDî`v­4×åfs¨J+õ([IY¡ÆoÆnÖª‹©kDìªY¶F³µŒåGÀJþy‡åÔ“Ÿ@¼¹Är¹ÿûÚ~÷¤žS¹‹¼ÿ  Ó|¶·b–¾ÄÎ-xô Ù(…§ÏamÄb»uhÇÅ=åÎ8T'öÕÎåšTTo$ÓN¤ßÁìC±(pJ;Ì¥d¬yõx!@:<“µqÁe9š~x‚¨Së¬ :ñ¹z÷ÒtÄë ;7Õ—)¯˜-;çéÖ¹Á‹ä–króñø|¶#øÆ{¸GI&y›¾¹¥VÛfõl—†A¦˜½É¹w¾¥{ħ­õ,ñnv¸c7ÞÃëgp9JLáC‡ä]·î1F'ýŠ›:09Ñéö¯•Äg^ðÑñCëµKÎâ“36l·ÅggËßüz(ßM¢«ËüÛ_8˜Óî¦n˜¢z—|1§P\,ª«ÒA<5”±\ÞUn¨^\üåÿ¸=£·Ù¶8âÓVXÎùøƒ±Ó¨9 êî´ÿ¶çÔ°¦§hÀÏ¿{ÒäÌ(°¥==ê«ïçÆ¼ÛK÷~‰Ô~My~—AR·ÂéÝkô+í)¡™tÕþÞX´–£ãx;æd—/o[1îɾ€%3d¼Ì=þ]ÛþÑEEð·&'wN„V{ý•Bj[ƒ2Pä6yÜçj{hã6Q!rI­ö-Lã~•€Ô¶PBgwí_.G|ÏÇRY³³fØmÔo¦ÕK¾4~6l$˜a=1iœt^*4cGw!%WžsZr¦ƒÕPýVƒ(çYC‡wþ¤„Ô$%w˜n÷e„ô6+=SKRKþ¥1(D+ðµT*$FX‡”b–V€¨aSujfØs/p·F†ûfƒxc2S7+*‚TXn,F<4w2%{FÁ‡ƒ@Bdƒ‚؈R¤)’X‰–x‰˜˜‰š¸‰œØ‰žø‰ Š¢8ŠÚFЦhM]wŠªxb‹3«hŠÿvn¯8‹ëFK‘H‹ó•8ꆋ¼X]ÊÖ‹¯Kþu‹zózÀHa®5SÕLžÿwŒõ'@ËEŒ\SLcèŒÈ¨T­ø~O’JŒØˆàsKaxp®x…Öh0X$*'ån²XŽ5CkÏ•ƒü±gù—rì85|WG2ˆ…”EÃ¥8áX.Á¶‰ççpAÈ€ãÈg`v†DDtÆ7ˆaÄMú¡p@Æm冈œƒ9†A3ˆCœ(O~xOù†ßµ™#HÜga®fÐw©çïDeSL81s”TУy‰‚Î×xvgt–X<Àu[[äZ»×:Ç%YëÕ} EEŽÝX8}Gp ø_m’Œ å<ñ7]‹Ö^ä8oA6x’È… ØuZ‰D!¸))õ;M†^_)Z†M—¨d÷`¥ÿçGsO)dp8*÷q€÷ãiÛX‰ï‡`ô—Vùuð¨€R&=òè^ŒÒw·ö…;©„˜Ô/ù_{)BÀ"]`™˜uéC·£eFö}¦Ù@I˜™Érù…P¯i^¦&™N™Øi'ˆx^i—§bš•mÉÅl›ii~ÙCÂ^ ‡a!É!*Wbo¬³~m÷„¹›g˜“–8VqË£“Yv…?•É#— £0Dƒdܲd+¹…lå‚©³lŒÔhYŒÅ§tˆT5DmÅ“O82bùb´':JyŠLšº ¿v©ˆwŸZIgûI~Lulb“¡‰#‰‡ž…c¹ø"„)¹˜¬éu$z[¦y¤I…|ux¹æù•W9¡|:íiÖY£Üe‹Õx¹(hüùc*¥˜G’›ÜY‹ß„:b¨RŽ´+&‡£—hÙQþ9¡Ö‡&<Ù]ÄÕŸ!GrzõƒÓÉŸæ–9E–¢z§|2zÏ•{í4‚–Å'ªùž‘G–Fuež\Ý4sHªœ™Åj+V)iÑ^”ˆiE žWŽh€%ÅxeºˆÉÄoJ§¨×ÇeÎ Dü…¨Ú8*¸ƒV‚f ‰©¢: šêJÏâ~’&v°s>NB±(:)Œ0ãtù–¼Åk£÷>áW:ä…|͈}qH§w¢ÿöÖNoª`‡FR£–gû™«½j„QfœÓ¹$W€¹yOX©³¨KÚžTú¹:ša¥k/¥¨#äb4*m‰©sæ LŒIg…¦xàuìÙ:€bš,$žp•¥Ê¦žYe¥…Êe9ÅZÈf“hÚ©¥X²µqàµeÉOά ©¯’d ôRÙ·=xTDߥ‹{eæƒ(ÆŠ”Î…næ“ív“Ш¤%ú¢ÚÚ’øccñD0ÿ±Z¡©,蘄rÔH¬µšS“ém-mja+h¤á¹ŒŸY[ugsè(cÌ8®/z~túJ÷‹Åz’cª´·›f²G&>Ò SlHfð&Ü:‰üšQMö¯ÿ;jŠØ<9Ê"š$`èVkËf.é”éh¢Äc :FÀ pî2eŠUôBžŠ'0r·d{´·Ý·8ÏYOå·R^õIC(?4Z¬tYÑR¶ÚeŒ˜a_›PïŠ8õ®:rJʈ˜&YœWC`öX±û|›$rÁ÷rPʇ²Z°[+¬ù˜xפ_}êi5©eÏg‹IF,«²Õ]&[µ÷è`¥™_Úª%¡.<ƒ†®¦VTݺ{¤­U»Â•§ÑÈ®›‘>˜p¿#ƒÚ£~j£`%{N<é¸r¨97¹¿5^f$¤j¿r‹¿]£{º¼Ài£œm‘Ù™‚± \º»[ÿ&·u¿ÛºÓX¥»´e;­Þw¤ÓŸïøï%´™®÷›±X×4®G¶Èê…úÙK0vÍ»í†OýAôiš…¥3xVD¿z˜z©Al€ Š¸Œ»-<¿g:~wSt[³ŠäœÄRšÄƒPlO5ºÒDÓ«¼óƒì’œ ¶±Y_Ôâ˜òÒ–ìF¢‰˜|‚vUÜ-FV®žY'Nº¡mÜŽDH2_{Á?Íì€ Òy½ Péë“y£IšÌ{y–Stv¡›¦y¬2ö©hHRð;¤KµCÚ< {³µÌÈ“²_•‚À‡y?¤'Œ_!k‚_ š–f­+餬,•ÿeü,¯û—öçFI³Ûw0îø©b›¶¼¤Áþ,™sW‡Mv8ÅHÌI›ˆ;D»æÄ˜&ßœ0«+XŽd—¿jÒc÷{•ƒtôT湯æGo¾‰Ë¬º†G’ÑmÕ¾E•;õEè§XAM­¿¬fX°fKÏ2Ô¾‹8iýHÓˆó‹pÌm%0ö…°¤›gË®”K½ÖR%ÊåóY´§ñu›Z2 ÷Çq½W/‡¦ªÐF«ÄzŒmð6dŠõ0ášáQÎ~=Ô_c×cÙSX«… Î"œ‚ÁŒ,¨·¤&¾”Têh³Íì<ô÷Åuú#øÚ”zÚºœ²ˆ[@iW”hþéÕ{ýgÿ~+Q-å¿g…<ë³Û"«³‹[»˜ ¼.'ܪ¨HŽ_0ŒXZüÒs窾£XÕ3 bwiØß[’ð,ľ{®ÎŠK atÈQ»»[Ëu[¡òŒ„¨,ê|ÛûÌ¡L§‡™­ÈÚF‚çÞÎ{vÈT9•ùÉ>C˜]EƒwÓºÕ¹ DÖÔà£êNõˆfØkìÚZͳ7/Û»¾÷-Òi8Ñ üdhH€9ÄZÿ¬[ÊÕøWiˆÄØÑYuœ™ˆŠ´!~C´Z‡ÖaÓìßËŠ(ANÒi[ã´ü8Œ NÛÚ­ ®±uƒoIiäýÁ÷Šá|Zª+žá‰ýi È;ïÚà7³X`*ËÅ%½sŠIÀ渆7 1b›;]‡x4¡ÇÔšá§ù¼|mº,¾àu~å„Î%w„æ…n3‹‹Ýèé’>é”^é–Š;hpcc-1.4.1/hpl/www/copyright.html0000644000000000000000000000562311256503657013646 00000000000000 HPL Copyright and Licensing Terms

HPL Copyright Notice and Licensing Terms

Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
  1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
  2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution.
  3. All advertising materials mentioning features or use of this software must display the following acknowledgement: This product includes software developed at the University of Tennessee, Knoxville, Innovative Computing Laboratory.
  4. The name of the University, the name of the Laboratory, or the names of its contributors may not be used to endorse or promote products derived from this software without specific written permission.

Disclaimer

THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS `AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/documentation.html0000644000000000000000000002775511256503657014521 00000000000000 HPL Documentation

HPL Documentation

The HPL software distribution comes with a set of text files explaining how to install, run and tune the software. These files reside in the top level directory and their names are in upper case. To a large extent, this page reproduces them. In addition, man- and HTML-pages are provided for every routine in the package. To access the man pages, one must add hpl/man to its MANPATH environment variable. The HTML pages can be accessed on this site, or by pointing your browser to your local hpl/www directory. Finally, the source code has been heavily documented. Despite all the other documentation efforts, the source code remains the most trustworthy and truthful piece of information about what goes on in HPL.

HPL Functions HTML Pages

Computational Kernels Wrappers When calling the Fortran 77 BLAS interface, these C functions allow to confine the C to Fortran 77 interface issues to a small subset of routines.

Local Auxiliaries Basic functionality, local swap functions.

Parallel Auxiliaries Index computations, parallel basic functionality.

Grid Management Most of these routines have a direct MPI equivalent. On new systems, when the entire MPI functionality is not yet readily available, these functions are particularly convenient since they rely on a mininal subset of the MPI standard.

Panel Management

Panel Factorization Recursive (matrix-multiply based) and (matrix-vector based) panel factorization.

Panel Broadcast

Update

Main Factorization / Look-ahead

Backward Substitution

Matrix generation A C version of the ScaLAPACK random matrix generator with less functionality though.

Timers Sequential and parallel timing utilities.

Main Testing / Timing Driver

[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/errata.html0000644000000000000000000000770411256503657013116 00000000000000 HPL Errata-Bugs

HPL Errata - Bugs

Issues fixed in Version 2.0, September 10th, 2008

Gregory Bauer found a problem size corresponding to the periodicity of the pseudo-random matrix generator used in the HPL timing program. This causes the LU factorization to detect the singularity of the input matrix as it should have.

A problem size of 2^17 = 131072 causes columns 14 modulo 2^14 (i.e. 16384) (starting from 0) to be bitwise identical on a homogeneous platform. Every problem size being a power of 2 and larger than 2^15 will feature a similar problem if one searches far enough in the columns of the square input matrix.

The pseudo-random generator uses the linear congruential algorithm: X(n+1) = (a * X(n) + c) mod m as described in the Art of Computer Programming, Knuth 1973, Vol. 2. In the HPL case, m is set to 2^31.

It is very important to realize that this issue is a problem of the testing part of the HPL software. The numerical properties of the algorithms used in the factorization and the solve should not be questioned because of this. In fact, this is just the opposite: the factorization demonstrated the weakness of the testing part of the software by detecting the singularity of the input matrix.

This issue of the testing program is not easy to fix. This pseudo-random generator has very useful properties despite this. It is thus currently recommended to HPL users willing to test matrices of size larger than 2^15 to not use power twos.

This issue has been fixed by changing the pseudo-random matrix generator. Now the periodicity of the generator is 2^64.

Issues fixed in Version 1.0b, December 15th, 2004

When the matrix size is such that one needs more than 16 GB per MPI rank, the intermediate calculation (mat.ld+1) * mat.nq in HPL_pdtest.c ends up overflowing because it is done using 32-bit arithmetic. This issue has been fixed by typecasting to size_t; Thanks to John Baron.

Issues fixed in Version 1.0a, January 20th, 2004

The MPI process grid numbering scheme defaults now to row- major ordering. This option can now be selected at run time.

The inlined assembly timer routine that was causing the compilation to fail when using gcc version 3.3 and above has been removed from the package.

Various building problems on the T3E have been fixed; Thanks to Edward Anderson.

Issues fixed in Version 1.0, September 27th, 2000

Due to a couple errors spotted in the VSIPL port of the software, the distribution contained in the tar file of September 9th, 2000 had been updated on September 27th, 2000 with a corrected distribution. These problems were not affecting in any way possible the BLAS version of the software. If you are using the VSIPL port of HPL, and want to make sure you are indeed using the latest corrected version, please check the date contained in the file HPL.build.log contained in the main directory.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/faqs.html0000644000000000000000000001336711256503657012574 00000000000000 HPL Frequently Asked Questions

HPL Frequently Asked Questions


What problem size N should I run ?

In order to find out the best performance of your system, the largest problem size fitting in memory is what you should aim for. The amount of memory used by HPL is essentially the size of the coefficient matrix. So for example, if you have 4 nodes with 256 Mb of memory on each, this corresponds to 1 Gb total, i.e., 125 M double precision (8 bytes) elements. The square root of that number is 11585. One definitely needs to leave some memory for the OS as well as for other things, so a problem size of 10000 is likely to fit. As a rule of thumb, 80 % of the total amount of memory is a good guess. If the problem size you pick is too large, swapping will occur, and the performance will drop. If multiple processes are spawn on each node (say you have 2 processors per node), what counts is the available amount of memory to each process.


What block size NB should I use ?

HPL uses the block size NB for the data distribution as well as for the computational granularity. From a data distribution point of view, the smallest NB, the better the load balance. You definitely want to stay away from very large values of NB. From a computation point of view, a too small value of NB may limit the computational performance by a large factor because almost no data reuse will occur in the highest level of the memory hierarchy. The number of messages will also increase. Efficient matrix-multiply routines are often internally blocked. Small multiples of this blocking factor are likely to be good block sizes for HPL. The bottom line is that "good" block sizes are almost always in the [32 .. 256] interval. The best values depend on the computation / communication performance ratio of your system. To a much less extent, the problem size matters as well. Say for example, you emperically found that 44 was a good block size with respect to performance. 88 or 132 are likely to give slightly better results for large problem sizes because of a slighlty higher flop rate.


What process grid ratio P x Q should I use ?

This depends on the physical interconnection network you have. Assuming a mesh or a switch HPL "likes" a 1:k ratio with k in [1..3]. In other words, P and Q should be approximately equal, with Q slightly larger than P. Examples: 2 x 2, 2 x 4, 2 x 5, 3 x 4, 4 x 4, 4 x 6, 5 x 6, 4 x 8 ... If you are running on a simple Ethernet network, there is only one wire through which all the messages are exchanged. On such a network, the performance and scalability of HPL is strongly limited and very flat process grids are likely to be the best choices: 1 x 4, 1 x 8, 2 x 4 ...


What about the one processor case ?

HPL has been designed to perform well for large problem sizes on hundreds of nodes and more. The software works on one node and for large problem sizes, one can usually achieve pretty good performance on a single processor as well. For small problem sizes however, the overhead due to message-passing, local indexing and so on can be significant.


Why so many options in HPL.dat ?

There are quite a few reasons. First off, these options are useful to determine what matters and what does not on your system. Second, HPL is often used in the context of early evaluation of new systems. In such a case, everything is usually not quite working right, and it is convenient to be able to vary these parameters without recompiling. Finally, every system has its own peculiarities and one is likely to be willing to emperically determine the best set of parameters. In any case, one can always follow the advice provided in the tuning section of this document and not worry about the complexity of the input file.


Can HPL be Outperformed ?

Certainly. There is always room for performance improvements. Specific knowledge about a particular system is always a source of performance gains. Even from a generic point of view, better algorithms or more efficient formulation of the classic ones are potential winners.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/index.html0000644000000000000000000001143211256503657012740 00000000000000 HPL - A Portable Implementation of the High-Performance Linpack Benchmark for Distributed-Memory Computers

HPL - A Portable Implementation of the High-Performance Linpack Benchmark for Distributed-Memory Computers

ICL - UTK Computer Science Department
Version 2.0 A. Petitet, R. C. Whaley, J. Dongarra, A. Cleary September 10, 2008 # Accesses


HPL is a software package that solves a (random) dense linear system in double precision (64 bits) arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available implementation of the High Performance Computing Linpack Benchmark.

The algorithm used by HPL can be summarized by the following keywords: Two-dimensional block-cyclic data distribution - Right-looking variant of the LU factorization with row partial pivoting featuring multiple look-ahead depths - Recursive panel factorization with pivot search and column broadcast combined - Various virtual panel broadcast topologies - bandwidth reducing swap-broadcast algorithm - backward substitution with look-ahead of depth 1.

The HPL package provides a testing and timing program to quantify the accuracy of the obtained solution as well as the time it took to compute it. The best performance achievable by this software on your system depends on a large variety of factors. Nonetheless, with some restrictive assumptions on the interconnection network, the algorithm described here and its attached implementation are scalable in the sense that their parallel efficiency is maintained constant with respect to the per processor memory usage.

The HPL software package requires the availibility on your system of an implementation of the Message Passing Interface MPI (1.1 compliant). An implementation of either the Basic Linear Algebra Subprograms BLAS or the Vector Signal Image Processing Library VSIPL is also needed. Machine-specific as well as generic implementations of MPI, the BLAS and VSIPL are available for a large variety of systems.

Acknowledgements: This work was supported in part by a grant from the Department of Energy's Lawrence Livermore National Laboratory and Los Alamos National Laboratory as part of the ASCI Projects contract numbers B503962 and 12187-001-00 4R.
[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

Innovative Computing Laboratory
last revised September 10, 2008
#########################################################################

file    hpl-2.0.tar.gz
for     HPL - A Portable Implementation of the High-Performance Linpack
,       Benchmark for Distributed-Memory Computers 
by      Antoine Petitet, Clint Whaley, Jack Dongarra, Andy Cleary

#########################################################################
hpcc-1.4.1/hpl/www/links.html0000644000000000000000000000764511256503657012764 00000000000000 HPL Related Links

HPL Related Links

The list of links below contains some relevant material to this work. This list is provided for illustrative purposes, and should be regarded as an initial starting point for the interested reader. This list is by all means not meant to be exhaustive.

Message Passing Interface (MPI)

MPI is a library specification for message-passing, proposed as a standard by a broadly based committee of vendors, implementors, and users. Machine-specific (optimized) as well as freely available MPI libraries are available for a large variety of systems. Browse the Message Passing Interface (MPI) standard web page for more information.

Basic Linear Algebra Subroutines (BLAS)

The BLAS are high quality "building block" routines for performing basic vector and matrix operations. A lot of "BLAS-related" information can be found at this site. In particular, a reference implementation is available. This reference implementation is not optimized for any system, and it is therefore not recommended to use it for benchmarking purposes. However, machine-specific optimized BLAS libraries are available for a variety of computer systems. For further details, please contact your local vendor representative. Alternatively, one may also consider using automatic code generators such as ATLAS. This tool automatically generates a complete and optimized BLAS library for a large variety of modern systems.

Vector Signal Image Processing Library (VSIPL)

VSIPL is an API defined by an open standard comprised of embedded signal and image processing hardware and software vendors, academia, users, and government labs. A lot of "VSIPL-related" information can be found at this site. In particular, a reference implementation is available. Machine-specific optimized VSIPL libraries are available for a variety of computer systems. For further details, please contact your local vendor representative.

TOP 500 List

The TOP 500 is an ordered list of the 500 most powerful computer systems worldwide. Computers are ranked in this list by their performance on the LINPACK Benchmark.

Parallel Dense Linear Algebra Software Libraries

Browse the Netlib software repository or the National HPCC Software Exchange to find a large collection of freely available linear algebra libraries.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/main.jpg0000644000000000000000000002105111256503657012367 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀÃÎ"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú(¯—>"øjûÄßüM…¤—†ÊÞ É­a“d³D±@¬±¬7áò8=8ê:+Áü1ð_áß‹´8u}#Y×%·“†Vžñ8êŽ<¾d~`‚AìÃ8ø?þ‚Zçýÿ‡ÿP°Q^?ÿ ãàÿú kŸ÷þþ5Gü3ƒÿè%®ßøøÕ{ãÿðÎ>ÿ –¹ÿáÿãTÃ8ø?þ‚Zçýÿ‡ÿP°Q^?ÿ ãàÿú kŸ÷þþ5Gü3ƒÿè%®ßøøÕ{ãÿðÎ>ÿ –¹ÿáÿãTÃ8ø?þ‚Zçýÿ‡ÿP°Q^?ÿ ãàÿú kŸ÷þþ5Gü3ƒÿè%®ßøøÕ{ãÿðÎ>ÿ –¹ÿáÿãTÃ8ø?þ‚Zçýÿ‡ÿP°Q^?ÿ ãàÿú kŸ÷þþ5Gü3ƒÿè%®ßøøÕ{ãÿðÎ>ÿ –¹ÿáÿãTÃ8ø?þ‚Zçýÿ‡ÿP°Q^?ÿ ãàÿú kŸ÷þþ5Gü3ƒÿè%®ßøøÕ{ãÿðÎ>ÿ –¹ÿáÿãTÃ8ø?þ‚Zçýÿ‡ÿP°Q_>i~ Ó|ûDxgKÒ绚 l¥¸fºuf c¸\ ª£AÛÖ¾ƒ ¼Ãßòt>,ÿ°Tú ­{xÿ‡¿äè|Yÿ`¨ÿôZØñ?†5O k“x×ÁPy·s«èËÂj9.€t˜dž9$K,‡† gÄz¡\ñº$·@wôWÿ ›À%Šÿn>àÁûÆCƒý_^Ô¿ð¹<ÿA©>ÿ—ÿ?{û¿êúûT]©ÍôgwEpCãOÃö®¼Äà‹ŽÃ'þYöÒ¯Æo¸ÊëŽFÒùtpOú¾žô] BOdw”Wß¼ƒ-®8CäØ\}ÓÀ?êú{Ò?Æs¯7 ÿ Üp0Õôä~t]„–èïh®þ'€ð§ûjL3Sö žIý_Ri?ásx0_íÇÜX `¸ÉaÔ«ëÈãÞ‹¡ºs]ÞQ\¼)Q¸îX¡l.Häã÷tŒ¾`„krù(E…ÏÍŽ¸ýß8¢è9½RÕ¿Í}èï(®üiø~3y¸PÇýã€qƒþ¯§#ó¥ÿ…Íà¬ßÛµ,~Áq€Lþï½Bä—c›ñü„ÿì'þƒu^Á^Þ$Ò|QûJxbóG¹ià‹O’f…â*â;†ÆAèÊzw¯u¦&šva^?áïù:Ø*?ýÖ½‚¼Ãßòt>,ÿ°Tú ­=‚Š( Š( 6•è ßÿ­Ò¬FÙNG^8áO®;ŸAH¨¾XÈQÀcÜ/¹õ'°§™š0Ùþ ÝOÓßkéð˜j”_´Šº¶©^é_~—·m÷µ¤±”“ÐGÎÑŒ›¹Ôú·ò¨‡ðãŽp1Î>ž§üý,.Hª’>èUÿÐÄÓB/¿„mþ/aè=Mo_ )ÉJ-4ÒïåÛôò²O‘)R·õý_1GEÀ‚:è=O©¤B|ç ®6|ÙS±‡)ô>¸9DûTó×#é‘ýùÿ5·D7nÍ¿2Ìê2ÑŽæÄ;ô&£<ÃÊž ɵºþ»}ÚÛov×èÀ®|Db¿¯ëîïaÑmüÈB`‰[Cü}:m~ŸwÛmù8ówüøP²yß/˜½’_î¸þè}h··‰'\¬i±A%Õ¾¹ûÑ·p~îIÈäÕï³ÅUòü¤òÎâ[ÊSØçïÆqß§=9Ûð5*+ÿ_×õé¸Â`çìÞ«ÓÓ×~Í;'5|‚e ïi7€]>Y7v;¾AÅÛŸš­ÙÊ»YC«@>el º rÆäëß q¶‰¤(b]Û‚y1ÿ,Ë{¸þƒйi RDGÍ.éwbQ™‡bsòÊ0yãpüHº•/õý~Ÿ#Ÿ ƒ©íw_×ã¾Wþõ¯Fàþâ2Œ±©fe7"œÌœsþ%ê=;ÖÂóû95´,|ÃÜùe£ŽL(êFI^Hã®í;›t1†XÞMÒy™Š3,iW¸ã8<ÅQ÷X>^™î)Òškúþ¿¯SBI ¨$ôÑú`ž*÷°‘š@«‰FÖaÎÒœ7Fê xÞÓì÷ÚLw ²h4K8eߢýÐÃé_gÆÁ5x·§—o7Úß+_Oz\ò‹&—•$à ÝéêGòçvî2C6îñoAM‘™y.A » #ëïè*≠˵·€qõõ?çèñXè:Žñ‡ŸŸ›ÞÏ{õpQ‹±aã*Í$|6rÊOÓñ©,ÊÏrvî+ò+±]Íp­ü-þ×sõ¨ràæ”àãØqË{Õ+N»¿¹¹KHþÐ$ÑG€ÒÆCÊžy8íÆOÃœÎøyF”\T·NÛÝ;é羉]nŸºt`çu£9ê—õýoèÍ[f-q¹7·™!eÀÚK€r¬:$€gÐ0ëÖ®Žq³æË—.ãÎqŸºãœƒÁçŽ[úO…ôMnÄ_Ùjº‹¤ªòцR½›äûÃŽzô ã´[ÀÖoænÔuÞcv C‘ÐýÎNzð=|L°“ouý_×O°¡ÄjqiÆ_‡ùÿŸÎϟψãä€Ì%Ü›«=³÷Ó·b­Ú–ä8UUMŒs½Bž‡ý¨lò¼Œã8í?á_éâo4jŠ¿™æeZ1ócÀNýý{ÒÅà+6ùZ† Y™@h°3Ô“íÒªXiµºþ¿¯ë®43¼=9ݧo——Ÿá{é»vkоÈe”ªßÍËþîãÝgêÔRO›Íó>]Ê"—Îçcv㪟ásï^…ÿ%ŽÕ_í ýªùXÁê?ÕôöéÀôü>Ó“ËÅþ¡ò!Œe£9Sü')Èö=(Žioý_×r¾w‡©+¨¿¹Ÿ›í×k®^M7”J¿˜!]¾bÀ`€ßßü0õ‘Väÿ¤Fc,Ì • ¬ÃøŠg£âCÁë€I®äxÅbò†£¨„Ú«Ññ·¡'zõàz I|a>ï3PÔ fVÈh :„àûŽjc„šwºþ¿¯ë¦Õxƒ:|ª2¿Ëüý:|´|Ü¡U𠬥Ai"1®C)˜Çb?‰9õ5xª€mÂ)#o#aþ$õ^™SÓŒt\õËà+‘Ýu @38á¢7¨ù8'¾:÷§ÿ gÿA@|þ`Á‹†õ'ç>¹>¦‰afÝî…C>ÂÓ+Œ¾ååçä¾å»W~y'ÍæùŸ.åKçs±» qÕOðÈ9÷«6ß;L‰b:Ë÷•‡EvøëÇ${Uø}§'—‹ýCäCËFr§øNS‘ìzS£ð„Q„]CPÚËÃ4M•ô9NG¦zdÕË 6¬¿¯ëúóÆ–w‡Œù¤Ÿáçæ»¿ÇMR_†¿7Æ ¿rnƒg† -Û!€ã=9F;OÔuó§„´8to‹Þò®n'2-Ú–œ© -»c ž§®kèºé£(³ÄÌq0ÄâeZ;o¾È+Çü=ÿ'CâÏûGÿ Ú×°Wø{þN‡ÅŸö ÿAµ­#Ø(¢Š(¢Šø%c%Wý¡“¸ò@õô_çü¦ÆåãqÀ'¾;ŸAô¨Ñ—Ê1ÆC§ßØve Ù8$~d_åüþÛ/ÃÒm4úwôôëøë§Æù¦ßõý_ ˆUxm¸<§'ù·ò¨ü²¹ÁiÀÇ8>ƒÕ¿ÏÒðؤ6ÐsŒsß§ß·?Œa€Ç!xãoðEõ>¦ž+GÚhÿøo.Û-­înß×õý}ïòÏm£hÇaè=ýMuŸ ÔNNÜ›6ä¿Aéé\¾áê¿wtäûšå¼±ÎKrwõ>çúò:¿‡ ÅæÏØß ÿ¿~çÖ´Î0ó†É÷_×ë»ôoÞ6¹S¢Š+äNƒ×lo,®×^Ñ­Ö[´]·vãƒuÿ1ÁëÛžêi:µžµ§Ç{e&ø›‚ ÝXv#ÿ¯ÐÕêåµm&óIÔ$×´÷ÊÜÞØŽå¼¾Ž9úýI ÔÑTtZÏZÓã½²“|MÁ†Fî¬;ÿ×èjõQEQEQEVÒ¿ä¯x/þß¿ôœ×»×„i_òW¼ÿoßúNkÝè¯ð÷ü‹?ìþƒk^Á^?áïù:Ø*?ýÖ€=‚Š( Š( ‚’A±zçî‚£§²ûúšv´`ƒÀ=‡©ô¯óª9¯+Û>¿Aïõ÷Ä©/\pXã€sëì+ëp8ª‰ü¿Ë½úo嫼lŒ%z7о x[Ũ_Üøn«yñµi~F ± P¤.Žø÷¯9óÎA9Žîþííè(”ðKpK;†HôÏ¿µDxÎxÁËgœ}}OùõÆU'ì%ÉOeÞï¿ví×NŠéè¤ _rç˜9Ḏzƒê}ýùgÓŸÜ»‹Gì>xÿ_Z㱌î¶O9ÃÞíïں߆£(¸Æ1ö&ŽŸ}:{õë\ß9áœ_—õþ}.µ³²Jš\Ç«QEòG@QEËjÚM擨I¯h1½±-Êÿy}sõú’oIÕ¬õ­>;Û)7ÄÜxdnêñý~†¯W;©hW—·‡™aÔ$ù¦·õ7XìGfôaŽùêMtTVv‰«Ç­iÂé"xdV1ÍŸz)ÞSþ{ŽJÑ Š( Š(  ÚWü•ïÿÛ÷þ“š÷zð+þJ÷‚ÿíûÿIÍ{½ãþÿ“¡ñgý‚£ÿÐmkØ+Çü=ÿ'CâÏûGÿ ÚаQEQEðjݨÛÔ†=qݿمN‡(âqüDtÏ|zŸëùÓVm›zdAþgÛГ:JÅnn½qÓ=€îOéšûl¿´µÛúê½|·÷]æsN,’Cµz•ÁÆ$g°õcþ}âéþÎß—åçoû+êǹ¡ÝÂã$ØÂó‚{Üúšˆ; `ôùFßä¿ãþKÅc!í6×Í~›tvä#oëúþ¾û¿—÷xÿÐGõ?äu?¹ñLçå9³s¸uož>~ž•ÈolAþÃ{EÍuŸ ›wŠns›6ç'çNk<ë àÜWuÛüÿ­ïgïãi«EWÆ!EPEP5­éú†Ÿ¨EY¶…¼²írƒ¡íÓôî_IÕ¬õ­>;Û)7ÄÜxdnêñý~†¯W-«i7šN¡&½ Ç¾VæöÄp·+ýåôqÏ×êH`¦Š£¤êÖzÖŸí”›ân<27uaØþ¿CW¨¢Š(¶•ÿ%{Áöýÿ¤æ½Þ¼#Jÿ’½à¿û~ÿÒs^ï@xÿ‡¿äè|Yÿ`¨ÿôZö ñÿÉÐø³þÁQÿè6´ìQEQE|¨JÆF;ŒúŸ_§ù6 Ëgž{ŽyǹõíMCûµ;º2£¦{/ûG¹©Ð|vž6ƒ×¾ƒüûý¶]„ƒiÝì¿5Ûîü½åʹ§&@èJ/à)à/ þ¦£ØÇu\ñÆ ôÿäÚ“æ\à6NrNÇsþÈÿ>ñuÿkwÍóq»ý¦ôQØQŠÁÃÚnú[~WZÙ}•"2vQòr9ÛÎ8Èô‹üë¬ønxªçqÏú#òG'çüâ¹ÏûÜõÿxÿAþGSðëþF™ÇÌ1füƒçó=ÏÖ–w‡„0nJû¯ÏúóëÖL);Èõ*(¢¾(é (¢€ (¢€ (¢€9}SG¼Òõ6×4ÃK!ÿM±ÎÕ¹ÞŽ99ïù†ÙÒõ‹=^{Y™Û4.6É u—±È#ð5~¹ÝwD˜]®»¤/T~tvî1Õ߃ôôtTU'V³Ö´øïl¤ßpAá‘»«Äõú½@´¯ù+Þ ÿ·ïý'5îõáWü•ïÿÛ÷þ“š÷z+Çü=ÿ'CâÏûGÿ Ú×°Wø‡áïáfjÞ+𦵥X}º(á|³ì `TÄÊ>hÁâ€=‚ŠñÿøG¾9ÿÐç¡ÿß•ÿäz?áøçÿCž‡ÿ~Wÿ‘èØ(¯ÿ„{ãŸýzýù_þG£þïŽô9è÷åù€>f^TrOxÿº?©ÿ&t ^yÛž;“è?Zõ_øgOãÚZLgÏ—ÿ~©ãöxñˆ ýnrG7>™ýÝ{X\m;Ë·õßþ¾±yÊ-žM'+œƒ“»,9?íoAQû_7þ„ßГë§öwñ“›QÐIÎI3MÉÿ¿Tßøg_ñGB<äæy¹>§÷TWÇR©>eý_Ó{±E¤y_N0F9Áê¿íV=…uß †üAµ×&Õmnü3“®'†9'ò¥<üÌ»>÷=A©ÈÚWü•ïÿÛ÷þ“š÷zò?|=ñ·ŽôMs\¸Ð®çå,žmíæDS£.8î;×®PEPEPEPEPEPEPEPEPEPEPEPEPEPÿÙhpcc-1.4.1/hpl/www/mat2.jpg0000644000000000000000000014761211256503657012322 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀ 5"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú(¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ )’˼ÓH±Å–ws…P9$“ÐVgü%>ÿ ö—ÿ‘ÿ'$·f£R¦°‹~ˆÖ¢²á)ð÷ý´¿ü ühÿ„§ÃßôÒÿð2?ñ¥ÏåýV¿ò?¹šÔVRøŸ@s…×4Æ8'î3À'¯¥'ü%>ÿ ö—ÿ‘ÿñîV¯üîfoŒüioáX‡‘ö«ûˆ®&·¶.cWH"2ÊKím¸QÁË2ŽYw4JgF±Õ-ÖE‚öÞ;ˆÖ@u Á#8>¦¸_éþñƒXÏ‹ô»«H®­ÕžXæFŽâ#™]êw‚p1È9ã¤Òu hÚ5Ž—oâ = ²·ŽÞ6’ö"ÅQB‚p@Π£ž=Ãêµÿ‘ýÌ訬¸¼I¡O2Cµ§I,Œ.³ÀɦÿÂSáïúiøøÑÏáõjÛr?¹šÔVOü%>ÿ ö—ÿ‘ÿð”ø{þƒÚ_þGþ4sǸ}V¿ò?¹šÔVOü%>ÿ ö—ÿ‘ÿ]mFÉ‘ï-Õ”àƒ*‚çRêÓ[É}äÊ…XüQkäY¢ªÿiØÏõ·ýý_ñ£ûNÃþ­¿ïêÿ/¬Rþe÷¢}œû2ÕWûNÃþ­¿ïêÿ+j6Hp×–êp ª8##¿¥Þ—ó/¼=œû2Ís~ñ[x²ßV•ôÙ4÷Óõ9´öŠIVF&0¼¼óc°ã‚kkûNÃþ­¿ïêÿrþ ÒSÂÿÛ?jñŸ{ý§¨I¨*/'Ë’O¾9‘ò¼.QÎIÏÖ)2ûÐ{9ögeEUþÓ°ÿŸëoûú¿ãJu% ZòÜRe^FqÇ> Ñíé2ûÃÙϳ,ÑU´ì?çúÛþþ¯øÑý§aÿ?Öß÷õƬRþe÷ ösìËTU_í;ùþ¶ÿ¿«þ4õ¼µh¼Õ¹„Ç»ƒŒgÓ>´ãVœ£$þbp’Õ¢z*·ZÏÔ÷ðQöëOùúƒþþ Ð’z¯5žs5¯ÚÞ0G渪n<.NOLÒýºÓþ~ ÿ¿‚©ê’Ëw¥]C¥êÖÖWŒ˜Žéãr@ݳpóÆN3Œ‚8 ¼%âøIìõ’Ïì—Zv¡6ŸsËæ'™(ØRÊA•SÔcŒž‚°<7nt]:K[½^Æë2—È‹ÉXÔwHìì_{³³fsšØûu§üýAÿOEAöëOùúƒþþ Tº·‘ÂGq±èÁ&€&¢Š('Å?ò(k_õá?þ‹jùоñOüŠ×ýxOÿ¢Ú¾b¯+1ø¢~ÁßÀ«ê¿#§ðgƒæñmÅò$žTvБƒ+#SÜA$€xR8$æ+Ð4ÛËß꺼šf¢~o0 ›ƒâ)EݵӢÇ!,Ìò/Þ>œsëžœVCBÀÉ´HÎ ¨ÈëÇ5õgŒG^[ãÿù®ÿë•¿þˆJõY"’Û,nŒFpü·Ç1´Þ8¸‰J†t¶P]®L1õ'sÅpæÂ^¿æ}W;cfßò?Î$Ú„ãÔ<®k3çÏ·‰ZÕ9 /!^êB²ƒœd?^9ômþËGñŵ‹Éjúh‚K/4ÞÂðIlK1/ÁÁfË•-œ¶:+•Öôµñ,šn‹?ö¬/óÛ5³,ÌëŒBÈÁ§Là^uH.TãÓF}†/o8ÕzIsG²[[ÕZïÕù˜uêzüŠZ7ýp“ÿGË^^bB³ØDÌU\”‘‚@>£püÇ­z†ÿ"–ÿ\$ÿÑò×—˜»Ëåù™çŽôaþ/ÒGI¡iQêrÌ%p¡c;yçqàwü=k)•‘ÙJ²œF5©k(ÓÞÂd*ò_dpUºãåÇ'O¥Mâ8->Ô.í.!Jxˆêv·¯¿Ïë^tððxE(«J_5+5÷v^§ÈƬ•{=žÞ«üÌJÕµÿT_õÞOý*³º@m¦ÌC2 ‡äÏ>œUû_ùEÿ]äÿÐRº²Ê8ÕumŽdÓûwE›+µ]Ç8RFãè?ÏÜŠv£l-/¤‰A œ®GcþqøRÆ«‹>T™ÖE Ž:ò}»V…ù†ûMŠá¥‰n‘yMêK~øúŠû£çLJæ¾ È ?ëþ/ý-uT„!òßpœ}ï§­s>?…V26¿ö„+†;pvK×=+ Oðdz¹üŒ)z˜ðRëžÖµ,n¢‡u”H 3$–Ø:†ÚQNHÎþ Q\Uz6‹«CáïÙ[Ã%»YdzYÆ¡ ˆÛ—f.Çi-™6–'hÀ Vtk oĆê ëkæßVŽŽc·1…CÓ‘·ÁÀÎ ¯&¥5ìÓŽëF~…„ÅÔúÜáUû³\Ñò[5åµþó•®¿À¿zÿéþÍ\ò身Ãq2é—³2Îâ+^X1ÇÊG|ô®‡À¿zÿéþÍ^~-5BWìtæ3Œ°³åwÛóGy§Z}ºþ( ÚŒÃ{gõõ=¹&±d,59 PDyÜ™îžxõÇLûRÄ‹˜ò’Œe;]DÊ Ž9<Ÿlü¾†µµF·Ôôh.ÚxRö$æ?5 8÷éÏ|{‘Šòiá¡<,£o}Z^«·ëç·CáçVQ¬ŸÙz|ûþ‡5[þÿ¬¿õÀÿèKX©mq'—² [ÌÎÌ!;±×¸­¿ +&±::•e…`ƒ¹jr¨Éc)¶ºÓ¡5ävTQE~Š|±“âŸù5¯úðŸÿEµ|Å_Nø§þE kþ¼'ÿÑm_2ÅM2D¥C;Ü*äú“€¹â¼¬Ãâ‰÷üíB«}×äv^ðRëžÖµ,n¢‡u”H 3$–Ø:†ÚQNHÎþ Q\Uz6‹«CáïÙ[Ã%»YdzYÆ¡ ˆÛ—f.Çi-™6–'hÀ Vtk oĆê ëkæßVŽŽc·1…CÓ‘·ÁÀÎ ®z”׳N;­ìa1u>·8U~ì×4|–Íym¼ÀÓ¿ãéÿë„ßú-ª¥jYé÷°I4ÓZOQ¬ð;¼d*È"bP’8onµ[K†ÎãT¶‹P¹û5›H<ùpITïŒsŽHÏ…žˆôý¤o)-lºkÜím<%e?Ã-Bå6É­ÀÑÞHŠ@h¡*Täò¦6i8ÆN%q^^•áÙiž,¸’ìé–ö’ʼnnm>ÖUðÅHØœmèÕÆåoô[]KÅaáYô½²|ÈT`±BdÇ@99ç9½XÅÆ.ícËÀ×­NµXâSQ~ú“Ùiª¾ÊÖÒúÛr¯…¿äoÑëþýµŸkk5íä–é¾yäXã\¹˜à ž:šÓð¼N¾§ ÷"¤Ö,…†§4 ;“ ýÓϸéŸjX‘bÓRQŒ§k¨™CÇ'“ퟗÐÖ¶¨ÖúžÛO ^ÄœÇæ¡gý9ïr1\´ðОQ·¾­/UÛõóÛ¡ó3«(ÖOì½>}ÿCš«Z‡ü|§ýp‡ÿE­F¶wNê‰m33&ðJúý=êMCþ>Sþ¸Cÿ¢Ö¸¹e.ëªü™½Óš·gúôH.5k’¾J°nŽç;Wô'ðÇz§¨Z›-B{rÈ\I^ ñ튲·ÚY@"0M&á#ƒæ++óŽFÑ€1Üà“Žµs_»°Ô!‚x§V»PÕU°G¶GcüëµÒ¢ðn<Ëž:ï½÷_-4^g:E^ö|¯OKuù˜5jïþ=¬ëÿÑI-…ÄË¡⺴ª#oŸóÊüÜ–ûØô'ñ®ÜER¡I§8¾V–îú§ç®zôªMT—=ù^ºôò2ëvÏþE£ÿ_Ÿû%eIeq ²\IØßIažsƒŽ¸8<ûV­Ÿü‹Gþ¿?öJèÈa(cR’¶ŒË2’xwgÔ‚´5‡Ú€:ª(¢€2|Sÿ"†µÿ^ÿ趯˜«éßÿÈ¡­ׄÿú-«æ*ò³Š'èü ¾«ò ·¥ÈÐê–ÓGyœ‘H$K‰U™Q—HUby±÷âªQ^rvw>¾QR‹‹êzM¶£es­ˆ`Ö­n#¹ñ,…½ºE*° ì%£8høÎ>SŽzìWšx[þFýþ¿àÿÑ‹^—^^u78ÓoÏô>O1ÃGQF.úu·§D»ZÓ§ŽÖþ+‰T°oÝ®KsÓ¨àŒÕZ+ŧ7 )­ÑçJ*IÅõ.O%ªÙˆm¤™‰pÒy‘…ÜFqÑŽ0OsÏ@4¯¿ä!sÿ][ùšÁ­ëïù\ÿ×Vþf¾¯†çÎê»[áýO5/'Ïô+Õ›yaH&ŽI%_0…@GzB?­E}Iã–.¦IŒKÂÇ@víÏ$ç>µä¾?ÿ‘Êïþ¹[ÿ脯R¯-ñÿüŽWõÊßÿD%pæÂ^¿æ}W¿Oüó‰ÍV·†u }+Äš…Ì÷P¥´‹ û4a™ðFPå—®àO=zÖMäEò»Ÿ¡Õ¦ªAÁìô:-oYÒ/|?¦iº~Ÿ-¼–2I™‹ N.deäïb½2B€,1·±Ð?äRѿ넟ú>ZòÊõ=þE-þ¸Iÿ£å®<ÊNT$ß—èxyµÑÃBþkêﺓeêUÚ]C’<2@úRQ_2x&½Æ¡i$SåZw‘wK (]ÁÃÔg>ɦÚÿÈ*/úï'þ‚••Z¶¿ò ‹þ»Éÿ ¥}K^U±É˳<Ì}5 ;KºŠ(¯¶>|Іò­’=ò†Z%`›±œyèGo¼;â;#øaÞ5Ú¨ÆTc&Åt5Í|@ÿ‘@×ü_ú.ZÃü®Gÿ#>§–Õ‹Ž+èd–æ{eFÜ&·MÒ!‚£róœwëUè¯h~·%tÑÒ^øŠí-væÍ§u¿i~γģÉele€m£ËÈä†ÎF6ü ÷¯þ‘ÿìÕÈW_à_½ôÿf¬q²r£+ög—¡ 89¨ù~_×Ì쨢ŠùCåÍ‹-FÖÚÞin£ò´A‡ •#xÊðOnN~·<7"Kâ ¹#ݱÑÙwœœÉõ®n·ü#ÿ!YëÿЖ½Œ»:˜ªP{'úXàÅRQ£9.¨ì袊ûóæŒŸÿÈ¡­ׄÿú-«æ*úÄ‘I?…µxa¤–K)•Y‰Bu5ó·ü"Þ!ÿ ©ÿ€r…yy„[’²>ó„kS… ŠrKU»ò2jŃÇô2Ks=²£n[¦éŽAQ¹yÎ;Œu«ßð‹x‡þ€:§þÉþÂ-âúêŸø'øW„—Cëeˆ¡$×:ûÑ¿u­iWÑë1ÚÝê,×·2ßÇ ÖʱÆ|¹ƒ ‰$8çìéÅWAaá}.¶‡©¨òe6’LléëUáñýuOü“ü*æ§-Z9°òÃPn1¨ºu^Ÿ¡“Z:¡o¥jö×÷ÒÜ}šE–4ŽaάÉ*ÙtãëRÿÂ-âúêŸø'øQÿ·ˆèªàŸáP£4Jøz‘p”ÕŸŸücO×F­âO Ú¥Œ6ÐØ^E>Y;™ ŠFþų’X’ÄàW#]W†ü7®Áâ"i´]F8£½…ÞÕ¨ $‘À¬¿øE¼Cÿ@Sÿäÿ ¹)É&×õ¡… ˜jU%I%e×ÎMõó2kSOÔl¬ôÍBÞ[9帻‡É­ÀEA¹|» 'r üà ㎴ïøE¼Cÿ@Sÿäÿ ?áñýuOü“ü*f¶Fõ+aª+Jk¾ý¾e믶]´ : UŽ5wÜÊŠ¡T’p>ƒ¥{§ÿ![Ïúîÿú¯*ÿ„[Ä?ôÕ?ðOð¯^Ôtë×ÔîÝ,îZg ˆ˜‚2}«ÍÍ)Ô•%h½ûzžsV5% +%.¿á3(«_Ù—ÿóãsÿ~›ü(þÌ¿ÿŸŸûôßá^Õêÿ+û™ã{HwEÁweºÞC=ÓKoÈØÂ§1 ýþÀð=‡n*ž¡ÿ)ÿ\!ÿÑkGöeÿüøÜÿߦÿ ³}§^½Â•³¸aäÄ2"cÈAíë]uj”špµšèû3ºpž’èû”«´:—®yà‘õ«?Ù—ÿóãsÿ~›ü(þÌ¿ÿŸŸûôßá\¾Â¯ò¿¸ÛÚCºûËo«¯Ù¥†(e "*’EtáBƒ ç‚9ÓRïþ=¬ëÿÑGöeÿüøÜÿߦÿ ³s§^µ½˜[;‚V›ƒæ9çB+©ýf¬eízvþò}ŒW²ƒ\­oßÉ™•bÊxínãžHÚO-ƒ*«íäŒðx§f_ÿÏÏýúoð£û2ÿþ|nïÓ…sB•xIJ1w^F²9+6¾ò[­B)ì"´ŽÝ‘#`P»†ÛÁÎ>Py''žÕzÏþE£ÿ_Ÿû%ff_ÿÏÏýúoð­›K;¥ðù‰­¦}«vÒ‡8Ùוîd¾ÚXÕ:©ìÖÖ<ü³Xw>½ÊTU°ÝÿϬÿ÷ìÑö¿ùõŸþýšûCÀ%-®ž@Ò.ªåx ‚yö#§z±çFÖ2A%cŒÅv“—N£'Ó¯øU°ÝÿϬÿ÷ìÔÐYÝnA¶˜(yùÖ€(ÑV>Ãwÿ>³ÿß³GØnÿçÖûöhaºH!cc(‘dÜ_Œ®qÆ=ýjõ…ÏÚu‹S‡ù—2>âxc×Ö¨}†ïþ}gÿ¿f®é6·êp¼–ò¢Ù,„òšé袊©ª»G£ß:1W[y ²œvžEywöÆ©ÿA+Ïûþßã^Ÿ¬Èÿþ½¤ÿÐMy|ÆRq©Wmâz³…Z|­­æoi?ۚij$çî£.I™°Oð®sŒ“ëî{VwöÆ©ÿA+ÏûþßãZú]õ¾qb²\Í.%˜,[‘ÄŠ§’ÛÆ=j?iCOÕ¼è”, ºÙ¿ˆu÷ñÇjóêSšÃ{HÉÞ/Þ×¾ÞšéêyuiTXOk ¾h»K^ûuÒÎëÔƒJÕu5‹{û¦F¸Œ2´ÌA‡šðŸøMã=Q_™â”£N)ÉÛg«Ýz÷Gæ˜Õ(R…ZRvÖ/Wñ/[nºtØ›ûcTÿ •çýÿoñ®#â?‰õûKIKMsS·Y4ýî!»‘7Ÿ0ÉÁäàŸa]Upä)¢ÿØ0éDõõ¾·W8q©ªä–úõGVAZ¤±R“~ï4hxa|]â/ x“Z-Öc:THßÈB2ï“’xE8ä°äsÈÿÂiâ¯úµŸü—ÿНJð<Òx/Æ:‹.¬¥Åä %‘NØæ3"»‰£'÷gj1W–SÈüVð—ü"¾2ŸìðìÓ¯³smµp«“óÆ0[¢ŒáJg­~·…­JXÉQœ¤¹¡¢Ûf¿ ë®ûXû 'ËtaÿÂiâ¯úµŸü—ÿŠ®ËJñ?ˆ$ð]¤ï®jm3jHdk¹ Û3œàn=Ï­yw:?üˆ–_ö»ÿÑVÕÛŽÃQQ ·ì»3ÑÉ=ìlµZþLét=Gĺæ·i¦Á¯jj÷m-ö·;TrÍ‚Ã8œgœT¾%¿ñƒâ+Ý0kú›¤2|öÉ (@eÏ#œž:æ£Ó-›N𼺤ú}üÐ^Hb3[¿•å$lŒ rŒ0ÎWœ X’+±ñžžÞ'ð&âH­§K»x”JƒÌxª2ùÁùFÒÇ8¯©Vñ1¼W³mÇeñZ[çÔúº•! ñ¼W#÷zoýiøž{ÿ ?ˆ?è9©ÿà\Ÿã]G†uÝ^ãGÔžmVúGK‹p¬÷J‚²ä ž3ù à«®ð’–ÑuEPI76Àß嚸¸ªœ!“×”NËoñ#Îâ¨F=yAYÙmêŽÛMW¿Ò/ïÿµ/m×(<öÃËgœð¿™#ž dljŸô¼ÿ¿íþ5Òé·Ù~#·°û%ⲯÙÁ 6È™?>Ò¹Ál¶wqÏ8¬i_Ùš»ùi‹yÿyÕzÁíèE~;ˆ§8áÕHIÞ.ÒÕïßôûDZT§,jÓ“¼_,µ{ïÓî)ljŸô¼ÿ¿íþ5ÎøçÄ:ݧ…ÖkmcP†_¶Æ»ã¹u8)!# ôà~U«\ÇÄù—þ¿¢ÿÑr×£Â3”ó¼$ŽöbÛ${†ci0ÉF,Ýv_¹{zQÇF„}œïè¾(ï÷íòGÞYòù£Ìá4ñWý ÚÏþËÿÅW¥|ñµ«xÒò KXÔ/!]=Ýc¹¹y7™Î‘œϽxÍz¯Àùï¿ì'þŠ·Î°ôc—Õq‚NÝ©·Î£h¢Šü¤î2|Sÿ"†µÿ^ÿ趯˜«éßÿÈ¡­ׄÿú-«æ­>ÎMGRµ±…•e¹™!Bç qÛšò³yÅ}ÂQÃÕ“Ù?ÐÔÂ×Òx>oc±Î±#©à¾OPªñžIé¶°ëÖü«èº–©y ý¦îk ûqVw6ÛÓ2ÈÀ1@K«’2à5×t‰´róKœî{y6†àoSʶ8Êqž3\µi(ÁN;mó=ì:u1(VV’´––÷_¯g£d:wü}?ýp›ÿEµT«zwü}?ýp›ÿEµ[ðÞ–Ú¶²‘ Yn£†7¸–Õ‰‘QKlÊò7=‹§ŠÉ'+$wT©\Ó–É_ó4uͤø/J×¥“罇ˆ‘ò« Ñ‘Œç*¬NHûÊ1Á®b½Á7Px’?ø~öÇSDœ´·åãE+1 ŒD»vÊ’ÁÏ“êšmÆ‘ª\é÷k¶{y 7Ž„d‚0Aî­«SJ*qÙþg–ãjT«S_ã‹¿ýºõ_vßqoÂßò7è¿õÿþŒZϵµšöò Ktß<ò,q®@ÜÌpOMhx[þFýþ¿àÿÑ‹W¼9e$ZV¥¬>òî—ì§ìß)@èþcï*ÃR œ AÈ Î1æIzþ‡UjÞÊs—[E/VäßøcþMql>7%G<œ†Èíó+`sÆ9&¹Úö ìá8øUÐZÝ‹Í8·ÙÆå ÕU•pWŽË 8ÉñúºôÔexìõFN.Ué:u_ï ùeòëó ö½OþB·ŸõÝÿô#^)^שÿÈVóþ»¿þ„kÅÍ„½Fqg¿/Ií¡§Xɨޥ´gnyfÆBÔÿžø¨%á•âaÑŠ°ÏB:Ö•œÑéðC)šXå‘ÄÀ¤{U%BŸ™zÙäb®xšÎ2ÐêVø1\¸Œœd}G·ozâúœe„u#ñFÍú=´òýO™öíVP{=½WõøõZÔ?ãå?ë„?ú-j­ZÔ?ãå?ë„?ú-kŽ?—ªü™»ø×£ý F’ú¬“ÛV4'?íº·øVmoC#i7(ö·a†8Þ\.á´©É Œöìi|S§ýžô]Ƹ޽À~ý»õúæ½ ¸(¬/<>(?{ç·Ý·[ïsš‡í¹e´¶ùŸü­]ÿǵýp?ú1ê­h‹f»}.Ýs™#Û3æ>N=‡5ÃB.jQŽí/ÍVoúÑŠºKõ"ØÃ€«ê¹Á?\ý:Í®¯FºVÕ&´{;˜Ä±á⑃*€Adžr3žµÏêvGOÔ&¶É*§*Ou<ŠìÆaa­Ko…ïñ.º÷]:laB´ISžû¯OøJݳÿ‘hÿ×çþÉXU»gÿ"Ñÿ¯Ïý’º8ýõz3,ÏýÝú¢ ³ue%¤p;ô•7ooËGo–`¬NÀ 6:à œ{àVÜJš¦•,(ìòÄw!eÚFy©÷•}áó‡?V-ÿÔ]×!ÿ¡­W«ÿê.¿ëÿÐÖ€!Di$XÐe˜€©5gQ²ûȈ6àP0?Ïõ›i:RŽéÁ{玽º“øV»Eý§¡T“̇;7ò[ž?Q@ý_Ñä-ü ÿA5B¯è¿òƒþÿ šëh¢Š¥¬Èÿþ½¤ÿÐMyzæ±ÿ Kÿúö“ÿA5äuò¼CüXzÅ?Ƨèÿ2Ä·÷“¼o-Üò³$«í)IÛ¯õÑtþ¾M'V“S:òis[¨1Fþx[¦ÜG1#,ûØ 1ËÌQ_¨V¤«Stå³þºÜöÓ³¹Óßß$^²Ò¡×’ø<ëq-™óÿÑ 0±†@€|ì\†;˜¨ YºÿÈÙ¬ÿ×ôÿú1«ÌkÓ¼Oÿ#f³ÿ_ÓÿèÆ¯/IS©ºó?½Ç±ôü4ïR§¢2« ²¸Ž \Z n$kÙd³“ÎÙ‡C¿ –ùAÎxPGÌ[ ÏÑ\õ)ª‰&öwû«œÒOÔÝñF©§saåÎ×/mfK;Hî%pÌÅpwB@àz‰¿ÕYׯþˆJóªôY¿ÕYׯþˆJüÿéªyuGe?ÑŸŸxƒ ¶ŒWóþŒŠ´´YÖÖé®~Ü-ž0 ©ß‰Nz6Ð~^2G~ò3h¯Ê)ÍÓ’šè~KJ£¥55ºþºšÌútÂÌiÆaPyl’¯ îc’sÉ9'Ž:}–|Qÿ¦‹ÿ`Áÿ¥×\Åù h¿ö úQ=}ÿ‡ULñÉ«{oût÷²J®®2si/w¦ÛÄájöŒ±¶³içjÙq¬¡èWc9Ü Ÿã玢û¼£Í»ŸVz=lj´é,¼Jnµ?6æò(à†{+ëÅžeHUÌ Ç2ž—y”€r‚¨èÿò"YØNïÿE[W ]Îÿ"%—ý„îÿôUµyU°°ÃÁ(7«[ùE¯ÐõòI_ŸäŠ(¬O¾;Xuk+O°[ɪÛj?fþÍ=ÇÚvÛÌÞX<VU „&Üß7Ë]2X'ÿ„’[YKwÔ¢hËÈ|ò -É8Ç^}k…®·ÂòÕëæÛÿAš¾SŠ0Ñ¥“×’oeÿ¥&|·Ð<ŸÓ{/ý)3N§³ o"ßsöe ›”Ç9ç>ŸÒ ¢¿ ‹³Lü/•¦ojÜ“ÙÜÛµ×ÚùqF9 òIÈ\±;~b3Ãt¯:øƒÿ"’ÿ×ô_ú.Zéë˜øƒÿ"’ÿ×ô_ú.Zú®­:ÙöSïú3×Àb'_NSzëú¿ÔòÚ(¢¿¤O´=Ã>,Ò´;[k+ŸÃuQ,k0[¡<;ŒeŒL`;R?)Ã÷dËÀ ?ä{¾ÿ°dŸú6*òªõ_€?ò=ߨ2Oý|îe§†Àâ'Û’Ööýîm 7$}Eäÿ/®-íôËYdÙ£\ÚjBäH£ÊyÖÕº³âß’«žYA•üÀí= Å?ò(k_õá?þ‹jù¢ÖêâÊá.-.%‚tÎÙ"rŒ¹8#ž„Š÷ôûgü)åþÑóþÝýþ‘öŒùžgÙþmùçvsœóšù꼬Ãã‰÷ü“ÃÕOºü‹É­j±ßI|šâÞH»^ága#8-œ‘Àü…^Òîïu_[\_Þ}§Êǘ÷³ÄÇÊÎ/ÚÂpÄ€{óÛ5‡Ep)»ê}\èAŨ¤­·C¸ÖÅ”G5”zZF.5Ù1#Àé¾"ê§pÇÎ mëÃÕ½;þ>Ÿþ¸Mÿ¢ÚªSœ¹µ3ÂÑö+ÙÞÿð[a]ƒu(4]q5;‹Ï*9’yQJ))Úä)àŒ‘ÎÑJ2q’’6¯F5©Ê”¶z¿Û4[¿xE4/´­µ»Á Gp¿:·Ú‰'$wnã‘ôEkx[þFýþ¿àÿÑ‹Y5S—2¿õÐÃIQ¨à›z-÷ÞL+­ðö¼t?_Á¯äÝ_ìŽ 7ú!.».ݼ ûÊYÆ'•0›ƒº5Äaሇ$öºv¿×ݱ[¾SÖï/!e”˜ÄŒYöt]Ä’K` œžs^¹©ÿÈVóþ»¿þ„kÅ+Úõ?ù Þ×wÿÐyy³½$üÿÌð³˜(:0[%%ÿ¤‘=Íı,RO+ƸڌäŽ=ïo'_*K¹W”~¹5^Šðý¬û½|Ï’=ÁåÈdO²*óÑü$}Ä?‡Ó‘Ð{ðsuøùOúáþ‹Z«VµøùOúáþ‹Zê«_ÚÑz[Uù?ò2…>I¯Oò*Ô®B&r$9ùHúj:+;;›µucr}FÖQ|ÁÙLªª»%“yÂÎFsýìg¿›wÿÖ?õÀÿèǪµjïþ=¬ëÿÑ]“ÄNº“’[tó’f¥m%ßô*Õý"á,ïVæI¶¢}ä ÀÈ=:ÁÀ5BŠæ¥UÒš©Ѭà§ÔÖ¼»†MÚÙ' ñ•ʨ œ¸ŒƒÏSí=Ÿü‹Gþ¿?öJ­Û?ùý~ì•îäµ¥[K¤m÷v>š†¥ÜŽ9d…·E#£Œ©Á§%ÕÄ@ˆç•9!\ŒŸZŠŠûcç˶xa;ÌÈC£\®w‘Áç‘רüzT³ÊJfál¢M¡q»Ìû¼zVmX·ÿQuÿ\‡þ†´^Š(  ˆmíäI™\I±Ä+Õ‡B82jk'Y¦¹»ëG±½–ÙÎLmŒúŽÇñ¨Äã3*T!Ub*k£÷¥£ß¿oÉž(açRPpŽ›h¶3ÿ³´¯úhßø,ƒÿˆ®‚þÎŵ¦}7Ov39,öq1''’JäŸzÇ®‚æ3.©‡†¨RÆbå LTâ¢Ý¤“WºÖÎú–lõßßi××öÿc{{F¾Áo•Üp8ٓП` Qÿ„Ó\ÿž¶ø/·ÿâ+·ð¤Ÿa“ÂR½ªA¨Y?‹ˆÚX®Îã“ñ Ú—o a–>]uk5•äö— ²x$hä\ƒµ”àŒŽ:ŠòªSåŠ}ÿ3éðx®½j”Þž–kÝŽ±}v†×ü&šçüõ³ÿÁ}¿ÿ]µ“C¬hMé—W/ƒ$Ú| @H'Ø{ž¤×”שèò)hßõÂOý-qb±¨Q”èÍÅ÷M§¿‘žo•`(Ò„©P„[•´Š]d]µÐ¬o]Òß@Ñ‘ 4Ë|àÀ9¨?³´¯úhßø,ƒÿˆ®£Ã’Ëk4Dl\9W ꎊT ç#Œ÷ïYÚ͇öv¥$*1|ñÿº{~¹jcs(ác^8Š›ÙûÒõV×oÔù¨Ó ë:nòÑ|Ììí+þ€š7þ ÿâ+^ÖËODJ4­1PO!¶0…jdãn2p9ïè*…jÚÿÈ*/úï'þ‚•Ó’fxÚØµ•¥%g¼›ýLñôiÒ£ÍN)>éXbiÖ2H±¦“¦ba‡’à4³i–PLñI¤é¡Ôà°ÅÿÄÖ†Ÿ¾"×)°:ñö ÈÏR3ÆAÿxU½vÜŠõË(¹œqßÓÓÒ¾ÏÛÕþg÷³ÄöÕ?™ýæØ´ÿúiŸøÿY>*½}à >™ocm#ÞDŒRÊ0Ù)äÇn¿_S[•Í|@ÿ‘@×ü_ú.ZçÅÖ©*NM¯SÐÊR¯§J·½õOTýS9ËkÅz…ýí¢ÚÉ‚+ÎVÂß* ã™= ' “T?á4×?ç­Ÿþ íÿøŠïü¦Óo†n–Õìï­íq ¨üÑpÛ‹›Ä‹ˆö£ ¹ œ`e˜ëºDÚ¹y¥Îw=¼›Cp7©å[œeH8ϯ¥.X)/Ÿ©ö¸<&W_RƒÃSÒÎ>ìuŽ×Û¿æ‹ÿðškŸóÖÏÿöÿüEoxoW¸×£»‡UƒO»Š2މ&Ÿ¾aœlëþ5À×_à_½ôÿf®:ÕgJœ§NM4´kFtcò|ºžS† «j£÷^G[“¦Í*E…£v £û6ß’z:ãFÓín t-dC‚?³ ÿâ+WJó g¼ŒÄ$L,^c…²3Ô€FÜƒßæQÄö€´:”cä@~Aù±Ç žÞœqï\‘ÆfRÂJ²ÄTº·Ú•¹vï½ÿ O‘pêʛ„lü–ÿ×ârßÙÚWý4oüAÿÄWEàÛK(5yZÛNÓíœÀA{k8¢b7/UÞÕ‹[þÿ¬¿õÀÿèKS—f˜ê¸¨B¥i´Þ©Éµ÷\x¼5ГŒ~ˆì袊ûƒæÌ¿K$Õæ†FŽXì¦dt8e! Ð×Îßð”ø‡þƒÚ§þIþ5ô?ŠäPÖ¿ëÂýÕóyy„š’³>ó„hÓ ŽqOUºò:kCÆ:…ýí¦¯ªÉ‚+ÎVòL¨'Ž3“Ð’z 5Ÿÿ Oˆè=ªàdŸã^“àE6›|3t¶¯g}hÿkˆ]G拆ÜXÞ$\GµmÈdã,|Ç]Ò&ÐuËÍ.s¹íäÚ½O*Øã*AÆxÍsTŒ£$ߟ©í`ëЯ‰©AÂ:YÇE¬v¿ßù¢ý‡‰õ÷¸pÚæ¦ÃÉ”àÝÈy±¯­Uÿ„§Ä?ôÕ?ð2Oñªšwü}?ýp›ÿEµEkk5íä–é¾yäXã\¹˜à ž:šËžvÜô>¯‡RmÁZË¢ó:„Éá‰|B5LXG(Œ³^Èäãpò»ˆ\ŽÿCŒøJ|CÿAíSÿ$ÿôÏ \iš¨Ô<,Ú­Ö›sl©m jèèQ³ªÉPÌG™€NÖóËW”jšmÆ‘ª\é÷k¶{y 7Ž„d‚0Aî­jÅÆ*Qzzõ<ìZu«T£VšRVkÝ·ºöÝnÓ7ü7âMvéÍ­j2E%ì*è÷NUp žEeÿÂSâújŸø'øÑáoùô_úÿƒÿF-·†]^;›³³³ÅÍÁ™K!PÀ`8f*‡±»8 …)4•úÿ‘×:4)ÎrpZ%Ñw—âÍmükáè,&ÔuN5½ˆÉý¶MËŒeXÃ+Ç¿¨ cÿÂSâújŸø'ø×£Kh|mðÕâû|ž±¤±dšåäÎÒ0ùeÈ3"’IÎ<Žª²piÅ»3Ÿ,•X¥¤º„|Wú”Ò¤QÞ\—v £Î<“Ó½:âóTµ¸x%¼ºYà5¿ÆŸ¥y3ÞFb&/1‚Ùê@#nAïó¨¿â{@ZJ1òN ? üØãOoN8÷®8Ò­,$«)JêÝíË·ßÃSçà«*m+?ÏúüLí;ÿùþ¹ÿ¿­þ5fûQ½K… yp£ÉˆàJÓ“ßÖ³*Ö¡ÿ)ÿ\!ÿÑk\н_e/yîºú›:pçZ-ŸèY²›UÔ.–Þ Ë’Ç’L­…¦¡–ÿR†WŠKËèÅXyÇ‚:÷«Zd¿ÙñÇqöˆ¢’G‡ óF2ÈSÁ9ëÓh>˜›ÄÖ+Ò^ÃÀÉ+ÓwáÇ#Ÿ~k±Ò¨ðžÚ3|Éê¯Ñíýy˜sÇÛò8«=´ê·3?´ïÿçúçþþ·øÕ›FõmìÊÞ\Ð’ÄJÜŸ1Ç<úY•jïþ=¬ëÿÑ\tëÕ埼öïæåNŽ‹і펱ukqsÝÏ—Ë3 úãך©ý§ÿ?×?÷õ¿Æ·ôycµ»K . (Èb–&I9ùv“ÎÞ¼€=…`êvGOÔ&¶É*§*Ou<ŠìÄÓ©O °›}%®ÏËò0¥8ʬ¡(®ëN‚ißÿÏõÏýýoñ­›K˦ðù•®f2}«nâç8ÙÓ>•ÎVíŸü‹Gþ¿?öJéÈjÔ–1)I½S,ÊT.—T/Û®ÿçêûøjY¥Ô XšK‰À‘w)ó#üÿ:«fYV5 zžƒÜû ܼˆßhâ\/™nOÝ`A_À‘Óð¯¸>xÈûußüýOÿ Måц䛙‰‚2çjX·ÿQuÿ\‡þ†´-åë0U¹¸,Nrj{·Ô,¦Ms.⡆%'Нj¤Ê[p@ŠX¹ÏËØ9êGJÖÔ#úTwhÉ$± HÉÜwì¿¶Me}ºïþ~§ÿ¿†®é7Wjp¤—ºÙ äƒòšÊ«ú/ü… ÿè&€:Ú(¢€2|Sÿ"†µÿ^ÿ趯˜«éßÿÈ¡­ׄÿú-«æ*ò³Š'èü ¾«ò6u ^ÊëS³½µµ¼µh$b/b#TE(Â1±°py9ÀÆ+WÅþ2µñuÍŒÒèæÙ­ÉY;€ZXÉnvqŽpyq⹚ÖêâÊá.-.%‚tÎÙ"rŒ¹8#ž„Šâö²³WÑî}+ÀÑæ„Ô}è&£«ëóüïcºƒ@Ò´¿Ø›9§’âßXÓÓwš²BÑÍ` ±Ü­ÎÆ8®‰HWV*’§8>ÜWá½kUŸÄšE¤Úä–Òj0³Âó±F&`Ä•'îçëÍwµägRMSq]ÿCç3 uiÍF¬®ìµþ¼îˆüEoE¼"@ûÊÛKg9<篾8§Ïzu]J9.‘v¶6ÁìI9Ç_NÕB¤†y­Ü¼2¼lF F*qøW“Dß»'î·w¢Üó(î–¶±núÒÞÙeU¬‹;"pC , Æ8ÆÎy9ô8Óžo#Sº-/"᳎I¡« în%‰b’y^5ÆÔg$ p0+fûþB?õÕ¿™¯©áéÂRªà¬½ßý¸ñóHÉ()>ÿ ëK÷²™åŠ(÷0ÇÍ’ôÑGu%ÃIò¶Æ‘pØœ`ƒŸÏ¦j­H“ÍlI]W;°Ÿ_ÐWÓA%Ü1Å䘉Û$að[v9#Àô¯(ñˬ~8¸veUKbcrv°òcààƒƒìA¯Qwy¼ŽÎÇ©c’kÊüÿ#•ßýr·ÿÑ \9‡ð—¯ùŸU øÙÿþq+Ìž(OEagÒÈÒ´kæÞF$– ¹9ËtrûÍ]|U⨯5H €Ü²E1†c çC³>ý |¹ã^™æ°*Åž¡{§Lf±»žÖV]¥à£×§ò¯)T{=·>òXJkß‚÷’åNïn×:_x.o ÛÃ;,¦9îçÁbR¾Vâ°Þqè‚+¦Ð?äRѿ넟ú>Zòó,†„ÈÆ%bÊ„ü œ@õ;Gä=+Ô4ù´oúá'þ–¸s7BN*ËOÍFgN­<%8Õ—4¹·µºKÍšÒ]y«n¦€vŒnùÆsÏ>¤ôÇ_¥YÔµ‰õDgŠ1’Uyê9?Oʳ©U™]«)È àƒ^ÄTåp¾’µþ[}Ç€éBêVÕmó5äÓ-~ËçBá™­Ä¢#p7½ŸàÃ`@Á>ÔË_ùEÿ]äÿÐR¨=åÔˆè÷32¹Ë†rC:úô•_µÿT_õÞOý+ÜÉêSž:>Í[FyØèÊ8wÌïª'3æØ@bO”’œäã=ñØv©ÿ´äûØü¨¼¬c¾zç=}yªTWÚ~ÚÎ ìšáäòü§òÙÈú‘Îyïé\‡ÄExl¡@Â=J T’AÂKÆF?¥t¿iœ>t™A„ùËôô®[â ð–æ$±Ô"$ž§÷rÖŸàÈõr?ùRõ8Wñ·‰£×FŸf—+7žc_3ËywÞA|ç$޹›Ä>)ŸÅ:•­æ§g´*#o²–Fxóœe‹ylwç±;»f…‰$䓹kÓËêÒ–"Œa4õ}ô8ñPš¥QÉéc²¢Š+ïO›2|Sÿ"†µÿ^ÿ趯™buŽdveU`LnNÖ‡b }5âŸù5¯úðŸÿEµ|Å^VañDûþW¡WÕ~FËø‰ÛÄÑë£O³K•›Ï1¯™å¼»‹o ¾s’Ç\ÞÔõù|oâ ÕVÂÌŒBÓ+Sþ¸Cÿ¢Ö¶­Rœé?f­ªíÙÿ_ægÊ3\Ï£#–êY¡ŽÙ²1…Äj÷=ÍY“R¼¿H­&´[”*G äv@òªW:¯Q_Þv{êõ]8>‹C~çM·Ž+—UˆlL®èeP=Io•½›ƒòã­f\1HtöÈ„‘ÿXýZ§V®ÿãÚÇþ¸ýõÓ:ð¨¤á.ŸâFQ§(YJW×ôbû†¼[²SÏSÂ%ú‘ŒÏZ·lòjú˜kÂÒ‡%`-Àé˜=úý+.ŠÆ‰©{íÉ^í6ìýM%J-{º=¯ØÚÔì!¶Óc• U¸êÌAÎâ6“Ã)÷ç$ÓìÿäZ?õùÿ²VnÙÿÈ´ëóÿd¯k'«¸õ(Ç•Yÿ[#ÏÇAà ÓwÔlyòÑò¥pÙÇ#¡ªÅž¥-ˆq q|ç$°'è:Õ*+íOµ <3ÉòoPJÆ$Ú@ëÀ ä®x©–×MyÒFC ÙÆ=†:ôçµRÈ"1 FNJÁü*xå’h®šYØBXäýõ c™âIvâA†ÊƒÇãÒ¥‡P¹‚ n¢3œ®Å9Ï®G5ZŠ¿ ¤ob®Ì¨Z@»äFÆTçiú}}*Õ¤)µh‘Ž×R£y<ñíô¬j¿¢ÿÈZøþ‚h­¢Š(†¶‰&¨¤ŠÖPÊING׌ÿchÿô ‡þþËÿÅ×´kò¿ÿ¯i?ô^G_3ŸUœ*AEÛCæ³ÜÛ©á+Jšk^Y5¸· ø+KÖå¸Q§B‹dîóeÁs÷Aùúg$ðz{ÖGö6ÿ@¸ïì¿ü]veõ¾q`²ÜÍ'ïe äa*©äîBqƒ†¯ÏiŸaÕÌè?su™³ëêAüqÚ¸ª{O«*–±~÷ÏoK=z¹Þqõ5VºœÑv—¾úí×K==u0tM'J_Óž=:$uºˆ« $8!†-Šó_øYšïüó²ÿ¿GÿНTÑÿä7aÿ_1ÿèB¾y¯Ð<;Àa3*x‡¥œ®6æIÚ÷½¯ÞÈëɳŒÃNR¯^riõ“©ìžÔ»žì±x…ÕG÷²ßü,ÍwþyÙߣÿÅW{¯xËTµñ§oÛìŠîT\¡Î;׈W­ê6qßxûUŠb¼¹–b‡æòл¾Üñ»jœgŒã5ىɲÌ,Ó¥BM6í¯kvõg±’Kë›Ä{É.ºN«y®i^ÒõÉ ¶ûÉxŠ}Õaº<`œä+“üJ1Á®kþ­[û–ß÷ìÿvÞ¼°ñv›¯h}<­vÏv‚[Sˆ³d•Ä­•WÚÛr9'®N<šXÞž)« ô#­yØ6¤§N­5ÍÚÚ=WÝ·Èö0”(ÍÎH.dût{‘ÑÿÂu«rÛþýŸñ­½RÒÃR¹†öîÂn'´¶w}ò “ v yõz,ßꬿëÆ×ÿD%|¿Ч†ÁS•¨·.žŒù¾4¯W,ÁÓ«“§'+7âÚ³vºét‡Øø;J»Ñoõ¦Ä«nAæÉ†l‚ÙùûúŽx¬¿ìmþpÿßÙøºï4‰m`Õ—J’i™LMe4F?‘ŽX“¸6~ñ`Þ‡õÌê6/¦ê3ÚHrÑ6þðêâ0kó¬K© Pœ_“õßúô>žç4èB¤1u4¼eïËâß¿geèdÿchÿô ‡þþËÿÅÖOŒJý6–M“¬{¡S O–KÝ÷Ö7R[júùl};Åâ9.ª?½™_ð³5ßùçeÿ~ÿ]f™ãZoÚݲ[yqÄgXà#¿ûF¼†»þDK/û Ýÿè«jïÄdUYRÃÂ.û¨¥Ñù¹MZ˜Œ\iÖ“”]ônëfvÚ‰5Ís[´Ó`[e{‰6–òóµG,Ø,3€ ÆyÅKâ_jÚˆ¯tÁögHdùi$¡—=9Áã®kGk}?HžòâîæÙï$û<2[D$dòÚ9Xàº`çÊÁôn:ëüiˆ¼+¦ø¶Í¼Â‘ˆ®aL©lgic²d`gïu f¼Š”0ÐÅF.šäwŽßk¦¾z«w>Št¨Cà¹^›uõü,rßðjßܶÿ¿gükZÇSohɪ[A2CuE—’³sÃgµpuÖøOþ@š¯ý|Ûè3W`ðôršÕ)A)$µKÍ|EàòºØŒ2äœR³Z5ªÙ£{MðV—¤_ßÿgB«n¹AæË†#–ÏÏžó$sÁ¬ìmþpÿßÙøºït[ë}7[‚ÄÜÌ쎆,£>âs¸7÷Éíèyîk ÄgöN¯, 1 ~ò/÷On§¡ÈçÓ5ù#ÚF„jB[i/]ÿà|Í1YÞq,*ÓÅÔº|²÷äõÝ=þ_#ŸþÆÑÿèýý—ÿ‹ªõúøS@kÍ"ÊÞ)¤ºŽ',]Á]²íêm×1ñþE%ÿ¯è¿ô\µÝÂöÅfô(W\ГÕ=SÑîŽ|¿ˆszø˜S«‰©(½ÓœšÛÔ©á¯ø—ÄÞ$°Ñ­–Édº”)(ˆ9fÁqœ('ç©ücâÝwÂþ.Ô´aö)#¶—÷lP’c`2r>m¬3ŒçÍø}í4­çP»¾½³“P—ì–óYÂ%xü–Šg8/ÖÜ`ÚÁGÈèkÐ>*XAâßhþ<ÓŸÍ1İ]·–cÊ+¬çnÙw.ìïûÄ.kö¹>QKË OÙÊñøźÖÝu»£ì/âÿxïêÎþf»ÿ<ì¿ïÑÿâ«Ð¾øÃRñ‹®í/ÜF– òЃ‘$cÔúšðºõ_€?ò=ߨ2Oýtæ¼?•aðu*ÑÃB2KF¢“^ŽÄG^o–Smz³èÚ(¢¿:: ¿HÐø[W•B–K)˜@ˇ¨9{+çoøHï¿ç†—ÿ‚«oþ7_Cø§þE kþ¼'ÿÑm_1W—˜I©+3ï8F9Шçõ[¯#¾ÐlµMkÂúÖµ³K,cÌ@év¸‘”pvO ÓŽKx5ËÿÂG}ÿ<4¿ü[ñºï¼'«Øè>/´ÑÚþé^(ÿ³%„Ûf'”HÍâLŸÞ³%8VÁÇ-\o¼=ÿ׊.l£\Z¿ï­¹ÿ–lNRx ¯<¹ïXT‹TÔ¢öÑêz¸:´ç‹V’懻m6koG×q¶!½{‡ 3ýL§2Øthê¯ü$wßóÃKÿÁU·ÿªšwü}?ýp›ÿEµ?FÓ$Öu«=6-Á®fXË*o( å±Ü’}…`¥7d™éʆR”’¾ËÌï!Òïdøqs¯ùzy¾ŽQ'šM³m‡¼¾P*pÞfy6žù®/þ;ïùá¥ÿàªÛÿ×£x#S‚Oßi2èz©¼¶Ïmrë,qÇÔÆ¬!Ú ';†rNGžx«A“Ã~"ºÓ˜1‰[tßÇåNp2{ dÚ·¬š‚œ_“õ<¬ºP–&¦¼UÝ¥Âúi}Ÿß¹£á½~òoé4:pW½…IM6Ý[ÇB}Ç5½©O2C ¦$²0TDÒm‹1<“Lð·üú/ýÁÿ£®xBÆâkùõ(,n¯›b¶S¹¤.0pÀIqAòÈ ŒÖQr’JÿÖ‡}jt)JspZ%ÑnÛKïv:_ØÜøoLѯ-F™qÔžTÒíž?7²¯åµ;F:&yæ¸ïøHï¿ç†—ÿ‚«oþ7^“á[[Oü;Ô4(-. Ki ´’éÒEY̸ŒwpHWëÈÈåŠH&xf£–6*èã ¤pA¡­+ÝZqz3“)ösö˜zÑNtÞº-SÕ=<´ùð‘ßÏ /ÿVßün½{Q¾™5;µ o3›xÉê{•æ¼.½¯Sÿ­çýwý׋™Õ¨©+IïßÉœÙÝ Q>X¥¤º„±c5ÍõìVÈ–ÀÈØÏÙbàw=; Óõ)f°Ôf¶ÛlB7Ê~ÍCÈÏË×T6f;{GšIeŒÌÞZ4k¸¥Xž£;psëíZúüI¨iVÚ´'qU !Û·Œã¡^ì@ìñÇÔP?Û%þì÷á?®é7/&§ •ˆ»îÄ ýÓÜ Ê«ú/ü… ÿè&€:Ú(¢€)kò¿ÿ¯i?ô^G^¹¬Èÿþ½¤ÿÐMy|¯ÿ‡ÅqOñ©ú?̱-ýäïËw<ÌlòPúŽxè*ÄM¨]ë¨Ü PÙgvi6ñØsÉéŸzÏ¢¼5VIÝ»úõõ>j5æ¥vïÝ;Ùú\Q[®¡û“`È5hä…âtÞP±@Îvƒ·Œc©×ËðO-´ñϯÑ0x䊲09È ÷¯ ´ù Ø×Ìú¯žkö? ª*°Å;[XÿíÇ×dµ•Zr’VÔÒ“Ä:Ôº”Z”šÆ ÷ñ.Èîšą̊¼ð9æn3ÜúÖÿ„þsy}s÷s޼ýj_íÝcìŸdþÕ¾û7—åy?h}›1»sŒcŒV}ÎéAî‘÷^Î¡ÛØêþ¶ÑôÈíƒ&¡ÊÄÞ£˜–wn—tM¸•6Œƒ‚pFã&•42·ˆç³•ÞÔ¢xdfbÅOžT’ß6qŽO>µÂW[á?ùj¿õómÿ Í_/ÅhÒÊ1M»®¯¼“>gŠ(*y>"I½ºÿ‰3m¯ïánîs: ,†C¸x9î:žás{jó]\ÛFI+ænO gÚ¨Q_‡ª³O{þOÔü.5æÛ¾·³Ùú÷7¦Ö9õaÃÉo;IåFAPÍ!˜€yA=È8ë:øƒÿ"’ÿ×ô_ú.Zéë˜øƒÿ"’ÿ×ô_ú.Zú~­*¹îËùŸäÏO/ÄN¶6›—ŸãsÎÿ¶µ_ì¯ì¯í;ßìïùôóÛÊûÛ¾æq÷¹éךŸþ}ƒìÛšŸØü¯'ìÿk“Ëòñ›s¸ã1YTWôc£Mïßn½Ïµ»=WÃ~$ð~—¦hfÖîk[Ë8®¾×änij$Hì“Ä¡ÆÒ‹»JîËhü"¸Šóãˆî`¹{¨f‚æHî$P­*›ˆÈb9Æ^‚¼f½Wàüwßö “ÿFÅ^i… z©¶Ü^þ·íݿˢ¶°“rHú6Š(¯ÌÓ'Å?ò(k_õá?þ‹jù–)d‚dš9c`ÈèpÊG ‚:úkÅ?ò(k_õá?þ‹jùм¬Ãâ‰÷ü¯Bª}×ä^}kU’ú;çÔïò5Ú— ;sÀläOæjÄ:æ©u¨Û=Õ×ÛäMÉêRy±¡q·?¼;F89iDU/–ù¹Ûžpyè:W®jò¼ÿ®ïÿ¡ñJö½OþB·ŸõÝÿô#^^lïI??ó<æ ŒÉIé$_i¸û?Ùüù|Ÿùç¼íëž:ÓþÝwåy_jŸËÛ·g˜qŽ˜Ç¥W¢¼5Vki>ljÉÆäsG3à ŽÄ2)’T"% ¸ü±Œ@9É#'¿7Pÿ”ÿ®ÿ赪µkPÿ”ÿ®ÿèµ®ªµÝZ.ýüŸùš…M;?Ì«OŠG†T–3‡F §éL¢¸Óiݵ}¹&§ –“#ÌÅÄB%u–@î@ä~VRÄõÁƒq§ û(”‡íh÷1,¡ #¸lç{päcqÀŸ¼=ÿ׊.l£\Z¿ï­¹ÿ–lNRx ¯<¹ï\’ÃASSQ^z/—C¾†G‘TÆÏ,%;Zñ÷V¶mJÿ=¼µ6?*§] ñyÚ¬é£ÍrÍŒàI?€Íwä9ÞeŠuzó•­kÉé{ßòF8è,+‹¡îÞ÷¶Œ©|¡Ce Óh°m‘:~=?å§~j¯ü#ÿ ,÷þoþ9]´5+;›_6& —ˆ.~AØr£ÓŽÇW:A‚0Gjú?¯â¿çì¾÷þg×1óò_{2ÿáð÷ý`ÿ¿óñÊç|SâOì}zM>ßHÓÞ PÈf-%H:tü;×m^gãKY¯|}-¥ºožqmk73C'ަ¼¼Ö½Zô”jÉË^ºô}Ïk"ÃP̱¥Š©í-RwZëäÙµö©'ƒæñ'ö&–-cb Dà•<É—ªñžIé¶°ÿá7›þ€º_þGÿãµØhÚk6·…-µ‹ ‹k‹EbHÊÉ[kDÎÃÌ` î#9-^S,RA3Ã4m±±WGe#‚= xÕhRŠN1_rÜöòþÉëN¤*á ši¥Éou­7]ÓÂo7ýt¿üÿÇk¤:n‹â}#IÔµMÒYÚÙmšu ¢ip}O9<ú`2¯SÐ?äRѿ넟ú>Zæ©‹­ƒ­…—³–׎Ž×]Pó>ʰ´ã<>1mÚé%¥›·à‚Óá߆.ìî®Sö»mÔ1i¹ù½å¯` ü½jŸü!žÿ¡rÛÿnøíwúÇk*ZÍ48¹L¼.rÀìAÈí‘÷±ÔV6§bÚv¡-¹ÎÐr„÷SÓüúƒWˆÏ³˜a¡Z8ª¥ï½뮚~GÏÓÃa¥VPt×u§C™ÿ„3Âô.[àMÏÿ­Ë? øz=Ea[™]cÍ€ÅcäÉžB¯|qõËkV×þAQ×y?ô­2~ Íq8¥ ؉ÉYï&,f–Ÿ´£YwZ2„Ð.'HcÑ ,ç÷óÿñtë¯è·2@Ú,¡Æ|ù¹úÏJÚ³ýÌm?š‘9;cfà‚ <Ûßq«ÚÔ <êm!€W‘ìz~ûW×ýÿ?e÷¿ó<¯®b?çä¾ör?ðŒx{þ€°ßù¿øåS×M‡…¼7%Æ›¤Z5ä(é$“2Ÿ’RúÌäsß¹öÇA\×ÄùýÅÿ¢å®Ln*½\<¡Rm§Ñ¶×ÜÎܺøÌU<>%óÂNÍ7týQKH¾Õ5UÕ Ñ4±ŸrŸ÷‡«L |«–<çîŒsXð›Íÿ@]/ÿ#ÿñÚì|wm¤êðxzãQ°h%­.ìdW®K1læ=ŒF|¯¿† §Ÿ•GŸx«A“Ã~"ºÓ˜1‰[tßÇåNp2{ dÚ¾z¦œ`¤¢»=çÒ`ò,–®*¥ ˜H%¤£î­c·núüÍøMæÿ .—ÿ‘ÿøíjiw–^-µ¹´Õt;‚'ŽUXä~l0É>fzù×]~õÿÒ?ýš¹§VXhºÔ=ÙÇf’Mz4vãxg'¡BU)a¡+Y¨«îtzwÃï j7©m‡m—<³}¢ä…©ÿ[þN*¼¾ð¬2¼RxnØ:1Vj¸àŽ¿òÖºí2_ìøã¸ûDQI#‡Ã†ù£d)àœõé´LMâkŠé/aÁ‚àd•é»ðã‘Ï¿5o=Î^ÚÇS™=}÷³Û¯õså– «ò:jÏm:­ÿ¯#ˆÿ„3Âô.[àMÏÿ®§À~Ðô­ryôÍ&IšÙ‘¤I¥rWrœa݇P;gŠ¡[þÿ¬¿õÀÿèKX`x‡6Äb!F¶"r‹z§&Ó/‚ÃÂŒ¥$ÑÙÑEõÇÏ”µù_ÿ×´Ÿú ¯#¯\Ö?ä ÿ^Òè&¼Ž¾Wˆ‹C⸧øÔýækéºßé÷ûö­ºåf#–Ï~ó$sÁ¬Šìt[ë}7[‚ÄÜÌ쎆,£>âs¸7÷Éíèyîk ÄgöN¯, 1 ~ò/÷On§¡ÈçÓ5çâ0±Ô‡M%ë¿ü‘åb°qŽiîŸ,µ¾»§ú|ŠV¿ë›þ¹Éÿ ‚§µÿ\ßõÎOýÓm k«¨mЀòº¢–è 8渒m$»ÿ‘ç$å—ò4—Csá™5røÄ€*ž…3´Ÿ®â=:¹‘]·‡ïìn¯®4ñ4ïÔB5†X¶ð©·ï<•œ ã·¹=FÅôÝF{IZ&À?ÞAüF vb°ð(U§¶Ï®«üÖ¶èzÜ$!Fj;|/[û˯Íj—Aú?ü†ì?ëæ?ýU*»£ÿÈnÃþ¾cÿÐ…;I çÚä$% £'!€^23óÈÈã<×ZxÉu=UÇQÇ`yÁ¬Ší¬¢‡^ð¤Ú|3I4ö„4FHö0ë´}â9ù—¯·ñ5¶:„i¸Îš÷d´ë¶_ÔèÌpÐ¥(T¤½É«­oªÑ«õ³ü¼‹ÆŸò=ø‡þÂw?ú5«×kȼiÿ#߈ì'sÿ£Z¿Ið³øøŸHþlõ¸sj¿öïþÜAá­ x’ÃF¶m²]J¾؃–l3…ã<ã?Œ|?ÿ¿‹µ-IæGm/îØ¶I€dÉÀù¶°Î3œUヲ• \êw×¶rjý’Þk8D¯’ÑLçãÚÛŒX1èù zÅK<[àmÇšsù¦8– ¶òÌyBÅsµœíÛ.åÀÝÿx…Í~™W:XèF_Õã·ÚÝkç¬mÝR£x¾çŠÖç‹?ä1oÿ`Í?ÿI!¬:Üñgü†-ÿì§ÿé$5éKøñô—ç:üá&ñŸŠàÒ˼Vª­5Ô±ãrF¸Î3Ü’«ß³‚¬=SO—IÕ¯4ÙÙkIÞ 2J–F*HÈ•ÖøQ´ð®•¦]K{kyuršœfÚÔL­M,*ûØÈÉ3††Rœ˜“ãf…Ói¾4Ó[̱ÕbE‘ðF_fcl1ÏÍÆ6Œlç–¯7ëÓŽ`©Oà’iiö£¾¾zõè_/¹~§‘Öæ½ÿ  Ø1ÿô®æ°ës^ÿ?†?ìÿúWs^•oŽŸ¯þÚÉ[3WÃ>ŸÄ ñˆ|ß*=6"`²º€òÔŒGÐ`d¸ä`×^Õà}wMðÏŽìtÔïVHb:<ð5¦ay„®Å„‚\Ÿß;…cØ rÕçŸ|9…|k¨iǨa-¸ôÆà¼’~\•É9;sÞ¼ì:sÅÎMšRŽ–Óf¿'×r¥¢š9ŠôïÿÉûOþ“ŠóôïÿÉûOþ“ŠÓ5þ?ÄŽü£ýö¦Ut^&ð³øvËG¸iw›ëo1ÔõI €eÉÉñÀ¬­&Î;íJ8¦, Uyf(~o-»í϶©ÆxÎ3^d¶þ9ð ö—ou5Í팞th|¦ä¢ÿ¬e üè2Ü qÀ'ÅÆâ¥‡œ%öS÷½‰ßÉŸkŠÄ:3‹û7×ÑèŸÞy%jé?ò ×ëÅô¢Ê­]'þAºïýx¯þ”C]•¾ê¿4tÕø~kóF}­´×·pÚÛ¦ù¦‘crXœ“ÇS[~1ðßü"ÚÚØ¬þtoJŽx'<6Go™[ž1ɪ:,É5ÔóÈÑÛÛÛ;ÊÈ›ß ˆ†Õ$wH½Hã?CèzÔøÃᤵķWšFàòKÝT ûr3·k“–Î8Á$)ÿ‘CZÿ¯ ÿô[WÌUôïŠäPÖ¿ëÂýÕóyYÅôþ_UùšŽ°š•ݽÃé–qU’#.ÙQUU²äà*òzóžk£¸ñ$Þ?Ôì­u¶Ó,â·2Iç=°l•ÞD€t•íŒäŠâ*Åž¡{§Lf±»žÖV]¥à£×§ò®(Ôië³ÜúJ¸*rŠp^ôSQwz_ï:­ J´·Õíd1ÝC¨YøŠÞÕáyÑcfl À Î6Œ˜‚±GVÈ9Èõ¯?𶫨ÿohºÛî¾Ãöø?ѼæòÿÖ«}Üã¯?ZïëÇÎå¦ãçúi Ƶ¦ï§áýzú²É¿¸kÅ»%<õ9 "QÏ©Á<õ«ö…õ{™¦½ewHÀ ð¶Á鸡~§ŒgÒ±è¯.ž&Q—¿ï+ݦݛîÏ"t“^î¹zkc•’Úy— ó/=rqÏÇ8úgNâgƒRºhöäÈã• Á'=kž­ëïù\ÿ×Vþf¾›†æ¤êr«YGõô<œÖ-r]÷ý Ùí¤y!eF~¤ ý8ãð©ü6Á§Û™øÌl3“ü$§é_J¡E}Aã–¯£Ž'ˆ"c.€µ²xÁ$ú:òÏ_\i¿f½µe[ˆRÝ£f\)ò ÈíÇšôšòßÿÈåwÿ\­ÿôBW=Úšk¿ùŸSÂQRÆÎ2WNó‰gâ GNÕŽ©fðAxWhxíb £ùWnÕ8@“êkgÃöÃÄÚ¦§©ˆ¤›ËÞÒKg'Ù÷Ÿâ‘ dòºX‚1¼žG<•åFm=u]¾­…Œ¢ýŸ»&­Ì’½»zŠ´}iImwkv&´ß,öÓW“Ìpp2NÚ à´÷ÈŽÿ"–ÿ\$ÿÑò×–W©èò)hßõÂOý-pæM:i[oÍNm à N3—3æßå#`êæ;ŒÆ%H‰1Àãã·¥N·3êú…ºÝ±“Xwuè¸$~l¥¬Èÿþ½¤ÿÐMy*;FêèÅ]H*ÊpAõëZÇü/ÿëÚOýב×Êññaè|W»V§èÿ2Ã_Þ5ÂÜ5ÜætY ‡pðsÜþu,wš…ä 5æöVùZî\¢z“»ŒqÓ¿½R¢¼%R][·©óJ´ï«mu×s¤ck;¢#°FV ‘yŒ¾K+!8…m£¡c×®z¥·”K ‹Ñш#ñ%¯úæÿ®rè ­+Uç´–›ÿ] k×öœ³Z=ÏȰ·÷‹p× w8ÆA!ÜG¡9ÏaùU‹I.oõyîв)Ì—n®ú!Á<ð=On£>ŠˆÕ’z¶Öû™B´“\͵{ît[,F¹¥½œ‘—’ì<ѧH˜º€‚£ #ƒ‚kÞòêÓwÙ®f‡v7ynW8éœTÚ?ü†ì?ëæ?ýU*Ö¥W$§5{z#zÕœ¢ªGÝw{zD±ý媷»ž'%c¨'׃Wô¹^5¹¾—ìwÉ1\˜Ù¤l~c»98äœR2(¬éÖ”wÛc*Uå &ÛvÛRÍû«Þ10v¨o,(]Á@lmÆsÓõ¯ñ§ü~!ÿ°ÏþjõÚò/È÷âû ÜÿèÖ¯Ôü.—6'û¨þlú.—3­'Õ¯ý¸£ýµªÿeeiÞÿgϧžÞWÞÝ÷3½ÏN¼Ôÿð“øƒì`þÜÔþÇåy?gû\ž_—ŒlÛœmÇéŠÊ¢¿^ti½â»í×¹ô—g§iö†Ëû+Ïo Üý†&’ã3éßéJÞVÛnO,6Ó6ß&ÝØ_3ñgü†-ÿì§ÿé$5‡[ž,ÿÅ¿ýƒ4ÿý$†¸èáÝBmÞéô·Tû¾å7tQ¹Öµ[Ûl.µ;ÙìàÛå[Ë;4qíTœ @Çj³>½¯ëj–ºÝõÔRÈ GwzÆ=Ùà’í´}N¬Š+³ØÓé¦ÚvzŽ£‘‡©-ÜÚLwñÙù){`4Ùaºp]¸›;ÄaÓ]‚í qÚ¼òÛiÞž ^)¢ÓÙã’6*ÈÂòà‚ä{×?[š÷üü1ÿ`ÇÿÒ»šâ§…ö.)»Þ]¿ºýÉtH§+•dñµ.¥¥&±¨=üK²;¦¹s*/<Î@ù›Œ÷>µ££Üê~ ñ$3ßë¼°ÄÄÜjòÅ*ˆÆ~Uk±Üv© dç*aÎQ]s¡¢’v¶Ëúù™é^ºýûþ.½ á~˜ðhÒønóH½Š×^Óyõ$·n÷ŒJ¦ß$£ŒŸ•™ÁÉm£Åu&ïAÖnô«äÛsk)ð ŽŒ2*F8äkìpœ=b1h}Ví­æ¶¾ýïøıø´“öŒê?áhkóë§ÿß·ÿâë¼×¼k©Zx‡S¶Ž B‘]Ë–FÉÈçæ¯ ¯S×m¦½ñ¾§kn›æ›R–8× eŒ„“ÇS]Ux{*ÁÔ^ÂŒbšwùZßuÙëäÒúÜçõ—Ì’Òý y|_¯Ceoy-ª[Ü3¬.ÈÀ9\nÇÍÐd ôÎGcUÿá=Õ?çÞÏþøoþ*»kí&]wáýÞ•sk&‘9k5{wŒÝF»¶¶ ³)lÎüد ®L‰R½$švùtüc ‡Â×R½4šðßÔÿÂ{ªϽŸýðßüU\Öô‹}cPQ¸yVk‹Kic (&ékНE›ýU—ýxÚÿ脯”ã¬=,&œ¨G•¹tôgÎq~*¶Q…§_/—³“•›]¬Ý¾ôŒ1àÛsjn³uäïܸÜA8éè?Îj/øE¬ç­Çýô¿á^©œÇÚ†ö)£hãYü¦kŒ3Œã†ê1Ž™®¿9Å{j ™ê¿êŒÆqF†P[Ÿ¼µõM¦¶0ÿá±ÿž·÷Òÿ…Wñ?Š/|!¦iñ[ËØL»®™²g›=ãÒºJà>(ÿÈSEÿ°`ÿÒ‰ëé¸ G2ÌÞxr·gµÓF¹OæØÊΊò’Jêýî•þæÉí¼}â‹»ÛØ4Û'µ²U{‰Do¶0Ìrwõ$ð:ðO@qSþ†·ÿ>ºýûþ.»oƒÚ|’hZ†“{¦Ý¥—ˆmæP†2T*ƒÒáJ©9”Ä`¨àïògI»Ðu›½*ù6ÜÚÊc|c£ €J‘‚9ý; ÙlM\;ÂÂñµ¼ÖϯGu÷E,~-$ùÙÔÂÐÖÿç×Oÿ¿oÿÅ×W¦x÷T›ÂV·mof${눈ØÂÇÅ×ç?¥xõw:?üˆ–_ö»ÿÑVÕÕ[†2Œ3Œèáã{ièÎܳWŠ*Òæ‹¾Ñd^8Ö'™"ŠÒÖI]‚¢$nKÀnäÓ®¼i­Y]Íkqkf“C#G"íc†d6:Š­àØ."¾—WƒM—P}?cGF_tŒÀ@HÁºUök➎Ñjðk± M¶¡î.Œ¥$Ur pyÈn˜®a°QÅ*š³O_=¾ãè] "Ä*.šÕ~;Ûî2á=Õ?çÞÏþøoþ*¯%ü¾-ðåä7ꑬvì¦A$¤Ãœæ¸Šë|'ÿ MWþ¾m¿ô«Ïâ|†SZ­(%$•ŸÍYü#€Ëjâ°«’¤Ri­Ö¨Ž?ÛË Ó!ºháÈÛ— “ÛÔÿœT_ð‹XÿÏ[ûé½SÃVÅ,ÛNšÒeŠúØ»ÜÏäÝŒcazGSÇ^ZËcy-¬ÃDÅOÜg±ê+ò ñ­J”*s=wò}¾ãóŒOqêýn~öþO¶Ý­øœçü"Ö?óÖãþú_ð¨u9ÛÁz$—ÚpË5ÌP°¸ù” ²6ãžtUÌ|Aÿ‘Iëú/ý-z<3ŒÍ¨añô$ìÓÙèÌðWb1¥['ºùÚg¼Q¬jPiÚv›eqw;lŽ$òOý÷€É$ð$ñU?áhkóë§ÿß·ÿâëSáDwEô~#MçPCx¶;¡¶y¼˜ÙIšQ±X«.aÇ÷•ÝFy+Gâç†eð÷Ž®çÚÓSf¼†FÉù˜æEÎÈbx²ää×ìáìãžáak]>ín·è­øŸUõü_/7;!ÿ…¡­ÿÏ®Ÿÿ~ßÿ‹¯Aø?ã-GÄ^-º´»†Õ#K”•È’1ÝšðŠõ_€?ò=ߨ2Oýi™ð¾O…ÂT¯CÎ*鮄¬v"o–Sm3èÚ(¢¾Ô©ª»G£ß:1W[y ²œvžEywöÆ©ÿA+Ïûþßã^Ÿ¬Èÿþ½¤ÿÐMy*#Hꈥˆ ª2Iôóôç°Qmh|oT©ÔÔZtõ5Vã\}2MAoï ºH"b'brF}x9>¢ªÿljŸô¼ÿ¿íþ5ØiP­í…Þ„ï[ˆ—Éx¦Gà nlÎ7üà uÁǸY¡{yä†UÛ$lQ†s‚ y¸ªu(ÂRv{ïºßåÛ¹äãiVÃÓ§R3•š×Wñ'¯ËkwF…¶¯©4¬£vG–ç™›û§Þ¡þØÕ?è%yÿÛüj_õÍÿ\äÿÐ –ÿjºH‹lS–wÆv¨fÇ|N=«•U«$’“»}ý%Z¼’JNí÷ô5î·-t‹]FMFðGpÄó˜ÂNO|ôžk;ûcTÿ •çýÿoñ®ºËÉ×ô«ý4ÜÛHÁŒ¶Ê…¿r3ò¯ÌƒåŒŒðqÇÃ:4nÈêUÔÊÃC]xÈÊ’„éɸÉwê´ævæ ”¦Üd·¿U£ÿ?™¯¥jºŒšÅŠ=ýÓ#\FZf ÃÍUMWV‘Õþõˆ «3’O æ›£ÿÈnÃþ¾cÿÐ…?JEW–êIR%‰J£¸80!~è$‚À㪎™Íc Ôœb¹žï¯’0§R¬ãó½ß^–EÍ]õ­ô[M©Ý1(®glŽqϨ#ŸJ¡ý±ªÐJóþÿ·ø×M­ÛkÃVúœRG=ͰÛ3ÅŸ˜w8Ú6kŒ­1ªtjû’|¯U¯Fk˜ª˜zß»›ä’¼uèÿËbïöÆ©ÿA+Ïûþßã^]âïx–ÛÆšìx‡Vе „Ž8ïdUE0ØÕè•ä^4ÿ‘ïÄ?ö¹ÿÑ­_¢øb•jø•WÞ²Žúõ}Ï[‡ªÎJ¯4›øSK@Ö3ø[ûÆM¨@¸³Õ·\/?v\4rIêCgóàt¯Òã^‡×þ¬á5¦Ší­×Ýù3é¬ùyŽSþOÐͬÿà|¿üUløŸÅÞ%·Õ`H|C«F‡O±r©{ šÖ&cÃu$’OrI®*·ÍîX˨ÛÏæ#IÊžH.±²aT,¡‹®<ÖS³æ'Ÿø­á/øE|e?ÙáÙ§_fæÛjáW'çŒ`6·E”ÏZóèU¥,l¨N •«ÃMítúkÑ«ibš|·LÃÿ„ÓÅ_ô3k?ø/ÿ[:׋¼K•á×Ä:²<º{¼Œ·²íö«…Éù¹8Uôv®*º=FÎ}F×Â6±ù—76&“ nv½¸dð9#­wÖÃÐŒ ÜWì¿•’›Ô݈øò_\x·þ]dYÅr°7²‚Èx2XeC•N3’[¦Þy¿øM÷H»³Hí ‰$ŽDhÐ+È«$!C9v6Öüß3Wˆë:MÞƒ¬ÝéWɶæÖSàdTŒqÈ ×_RjÕ(Ö¦”•š\¶÷ZÓuºwL©¦’i—¿á4ñWý ÚÏþËÿÅW¢h~"×&øJ÷²k:ƒÝÿn˜¼öºrû<€vîÎqžqÓ5ãõéÞÿ’0ÿö0Ÿý'¦i‡£p´ĺ#»)÷±O¸ÂOâújø'øÖ†¥{âÝ*ÞÂ{½cSXï ÂÂîB “Ó9ÆqƒŽÁ†qÒ°¬l俽ŠÖ"ªÒ6 ¹Â êYe$žÀ^¡âñOÞ Û4‡|y3Ç"´CG*ÍŒsÎT€0Eyª”¨T§fìô]vüw>Ï:tjA8«7¯éøžyÿ ?ˆ?è9©ÿà\ŸãZzgˆµÉ4ýiŸYÔ£³VB×NJŸ>‘Ï¡5ËÖ®“ÿ Ýwþ¼WÿJ!®šÔ)r¯unºy£¢­*|¿ ÝtóCâñˆç™"‹YÕd•Ø*"]HKÀg“Z¾%›Å>ÕE…Ö¿¨;’Ut»|0#œ|ÙÀ`ÜŒàf±48Yï^q:Û¥´M+\1lB~ê7Ê g{&0 ç¶kÐþö( dØWB†!²A$r{æ¯k7Px¦ößû:ÞàNˆD…Óƒœü¤ôÉè2sŽNs,[9xˆä`†PÊÃЃ{{^Tq’iÓŸÂíuéÚû$qòq•:Ÿšºôí}M3O’FÖàÉjÛ²rç;H\:WÎ0–xÌèï`dHÜ#2ç‚Ç|zú;LÔî§Õmb‘£+-ärH|” Í¿9ÝŒ÷=út¯›kõ¯ =›§‰ä½½Ïý¸ú,ÓöRöwµúÿóª_ÚAñ?ZØ^Ãþ’×rÛ‹á¹äff 8Œa@*A$dgž6oÁø­â»bTÒ¢ /Úby&™²±¬aÌA™‚ô'¢7$íSç•fÏP½Óäó,¯.-Ÿ!·C+!Èèrjý¦*Õ(é4¹SwvWÚ×׿{Ê]ÅÆÒí?á]U.æ?iío%¸Tl«11¾ò_h ¸móÔgÒoïíôïë“ÜÛKp=Ü;c˜FFòèNJ·f=ºãè|ªmBöæÚ+iï.%‚(¤•™P à('©éêkÑ|Oÿ#f³ÿ_ÓÿèÆ®\m7)Æ5î¥÷^:icèør*S©µ‘§á?ÇáMFòæ+)çŠuصÖУ9¾O™‡@p:·ñR}ͤ]kk¶Œ°Z£ÆÌ±y›70ܬqÚS’§!F «ßÞE“Üé"ÈBðw> ¨®)aùgí)i'kïª_ÖçÔ:6Ÿ==µüÒ.ëš:écåÍ,±ÞZ-ÒbT`¬ÌB³Bƒ×¿jî-®VÎëK¹xÌ‚KGÚnH…ç½y´ÓÍpáç–IX ìXãñ¯A›ýU—ýxÚÿ脯†ãç8`(¹;µ=þLø^?s§—Ñ“z©þŒ¿¥ê±i:¯Û!‚cR¢#8ä ÈïŒÞ”°X®³q4- .0›ÐÍœ"T‡Žs×*¤ŠâkvÝ ²FrQˆéô¯Ê¡^éB¦±NöÑ”ÓÄÝ*uu‚mÙYjüíøßéÓiÆÝgR²K”£ Ëü?Ž+Í~(ÿÈSEÿ°`ÿÒ‰ëÐ帚p¢Yd(Âïbqô¯<ø£ÿ!MþÁƒÿJ'¯ºðéÅç‘iÈÿöÓ×Èœ.~ÍYrþ±(ÇâÛ+_éZýž}±Žh´t¢(Ö5ÄCª€Ãp,8Ï·™ìζ_Ì–ÚåÒ5ìÖÚdQÈ­åb0áRà•Bë¼cqãicGÿ‘ËþÂwú*Ú¹)5}Ja —Q»q &é˜ïB€yçå}Öèÿò"YØNïÿE[W5xÖŒ#í¤ž«eo²ïøêzÙ%¾½ yþLÒ}BÍü?žm'ûD2É*Î.Â_`9M™ÆØÇñuÉö­éükouà¨ü9q¥Ë/•ÝIt‘ÁÈ à»ŒýÞ3Þ¸ú+‚xZS·2ÙÝj÷ûÿ ·–œ­Ì¶wÝîu່¦†9nàL,w!š.›@Þg. Џm£q‘óUí×ì6Úí¤ÿql ß(œà‰æ¹C©êno®NÜíÌÍÆqœsßò•ÒøbY'Òu‰e‘¤•îíÙÝÎK³Iîkçx¢5–M_ÚI=Oï#çx¢5VM_§¢éýätƒU‰5åÔâ‚dýá•£óÆKI¶ð¼ãcB$%ZER7üÃ’ŒŒd‚?/;NŸXÿ%ÿý{Iÿ šò«;“gyÊÆŽÑ0uWÎ2:t#¿5êºÇü/ÿëÚOýב×Ëgòq­M®ß©ñ|O'ô¤·Kõ/Ùj§êBúÞÖu"åW#›>½IëWmãŸÅ~ Vx㌰ 9Œ‘ò y'œ`~^æ°éUаe$rí^,+µhOXÞöØùêx¦’…Ma{µ¶¿wô1k6JËp~Ñé +*Œ`uèÜç½Rµ»šÎS$%(wÆ®0x<0=¸§ÛÍ+¶Æ‘Ù7–$ÕZSžÒ†ŸðÈ™ÔZJž–ýÿ‡.Ùêך|òÍk"E$¿xˆ“¦sÇØ{zUÍ>Ûûjæúæîâ*ÄîŠÆe}§cŒdŸÏ©5E®ÓJ~ôWKèñ2M*—”V¶¾‡AýŸŸ®h‰ÑO½£g–'ܬÞiôÃëXñ^Ï ¬–ÉåùR°h•ŽqŒäŒŒdã2j]þCvõóþ„*•]Jžê”.¯òF•jûªT×.¯Dü¢hZëš…•™´‚dX;„6zç#ŸÆ­Zið.Œ/Ùa¼™§-„„2‚ €A$¶8^¼œbÑS í[ŸÞ²Òý>û¯‘4ñ-5í=ä••ÞÞ‰Ý|­bΠ°¦¡:[•1+•RŸt㌎O·&¼gÆŸò=ø‡þÂw?ú5«×kȼiÿ#߈ì'sÿ£Z¿Qð¹óbqNÖÒ?›=þ—3­$­v¿öâ G[Ž•§Ø¶™e ±ˆCÄF_1“s± ¹ÊòÎÌp£žÐk5OxzËÂòiZdvдIh ‡Œ¨Ø¸-!å%~lõÏ^kŠ¢¿Z–Œœ[ZÅÝo£}w>—™Åïƒ-4¶‹­ÅzÖÛC5ÅÜ3Ö%*žd¡š3½—U\Çjƒ»ƒ‡âÏù [ÿØ3OÿÒHk2BöÝ™¡¼¸ˆ°Ub’²ä(À°àVŸ‹?ä1oÿ`Í?ÿI!¬hÓ© Ñö’»å·ÝËwn—i­5j¥Õ­Íزim¶ùe,`@B… ¬uTÀ€1ŠÜok5];Iñ½e °”°»º†–´–;‚©ä» ÛžÄqÔVÒÂÒirÅ&¯Êì½Ûö3=ïÂ}§ü&Ž!……ÌÑÚážH$*ÜÇ&Å—FÉAËl^¬Às÷z•Ö‘„5 'Dº·ÓÝâw‰d ßk¹ÁÚÀŒŽ ãƒ‚9ÌVæ½ÿ  Ø1ÿô®æ¹ÖPqYsÝõÿ_Žÿ1ß°Í?Å:®•®6³a%µµù]Ⲅ* v¦ÍªHÈò}NzÖ_ˆZ®µªëÚ­‹^Áa+GÏ»ÝJ!,ŒmM¡™@  #…¢·«…Œ“tíÚÜÖWK·§ÌJ]ÎóÄ^±Ó¾hÚœ:[ÛÞÌЋ‰¦iK6õ‘Ô«cÊuep¸hö…bÅŽ4¼?ÿ$aÿìa?úN+ÌkÓ¼?ÿ$aÿìa?úN+ÎÆÑ•*1S—5æŸßÓw¢èzYK¾6™_KÔN—tó‹X. Dñl›~u*ßu”çi#¯\¯áÏÞø].—O³²&åÃ;J®Ç8QóŽO¿<“ÅstW=\=:©©«§ú{RŒ*&¦®™ÐXørãTÐ5y-ö[Zñp’~}ÊX`äíT$’O§'®ÜéÖvuä–LÆ+½;’­8—a7q»‚¯#hÈÇ#œW+²DÛ£‘ñÊœt9¨ð­]6Y&±×ä–F‘ÍŠe˜äŸôˆ;Ö5)ÔO™ËK¯ºëOÖæ5!4îå¥×é§ër•¦£seou!Ô~\ÁáG%sœÀ•çŒrì*þŸâÍkKÓM³¹Š;7ݾ#mÝ×vå%²8ç3Œá ¯ÿ„·þœ¿ò/ÿZ½ãÅ?ò(k_õá?þ‹jùмLÖ…:“‹šè}IÃY^qNU1ô¹Ü]—½%§ýºÑÒÿÂ[ÿN_ùÿ­Gü%¿ôåÿ‘úÕ¡á.¹á]kQrÆê(wYD€³1RImƒ¨m¥äŒïà•ÅW,(¤Üwóg§GøZµIÓ†X;?~§kÿ7õc·ð÷‰þÑâ]*±íó/!Lù¹Æ\JÁÿ…QÿQ¯ü•ÿìéžÿ‘¿Eÿ¯ø?ôb×¥Ö´ó¼~G eÕ99Þº'{Zß}ÞÇ6/…òœOg†¥Êš¿Å'®½Û<ßþGýF¿òWÿ³£þGýF¿òWÿ³¯e‹J…ü9<ƒ x…f`ʦ2ö*KqôíXUÕ_øš‚‹ž#âWø!÷|;÷ìpS˰5/hlí»ÿ3ÍÿáTÔkÿ%û:íµ¯ý¯^Ôn´vy×RÉ·ÈÎ2Äã;«J·î‘¤Õ'Y¦`©-^¦KÅ™Æ9ÎXšÜÎ6·»½ï´WdeˆŠËÚx_wš÷ëµ»Üà?á^ÿÔSÿ%ÿû*?á^ÿÔSÿ%ÿû*ô­Nέ-g¶!£+±œtc×8ëž¿Lb²ëÞþÛÇÿÏÏÁ‘‡ö®3ùÿþGÿ ÷þ¢Ÿù/ÿÙT"Ö±uq¦ýŸÎû5­´~fý»±sŒ~uÞ×–øÿþG+¿úåoÿ¢¼L÷[B0Ä>dú.µŽü·C>¬ðÙœ}¤"œ’»Ž·J÷žÍùü%¿ôåÿ‘úÔÂ[ÿN_ùÿ­[z÷„¬¬þZ_Zm’þÆå£ÔŽÖFä’Q‚(ÁÁ°lן×ÌÔËéAÙÇñg§„àŽÅAΞf×ÇS§ý½×Ft¿ð–ÿÓ—þEÿëU­gŸð™ÙèÚ¯Û~Çþ„Ñy^W™Òy¹ÎG¯¥rêzüŠZ7ýp“ÿGËWG_(¾+.IÚײz6¯¤®¿q¼’åñL-VÝŸ½7¥›ë'Õ#…ÿ…QÿQ¯ü•ÿìèÿ…QÿQ¯ü•ÿìëØô]6í>øà¦!äsÆ}ö‚}ýëºêñÇÓ§ ¤¯orø«žd2Ü ¥(¨j¼ßùžoÿ £þ£_ù+ÿÙ×O§|=ò3¹!ÆîÛ?ZèkV×þAQ×y?ô®¬¯Œó¼f%S¯^ëWðÁkòŠ&¾–>Û­%×W¿­ÑŽÿ¨§þKÿöT½ÿ¨§þKÿöUèºM¼S^+O·ÊR£1Îè,wª÷[¹aç ÜdóŽß¥}OöÞ?þ~~ üŽOí\góþ üŽ þïýE?ò_ÿ²©.´ÿøC¼5u7›ö¿´^@˜ÛåíÂL}Nk²®kâüŠþ¿âÿÑr×g™b±8IÑ­+Å­U—èðµ%™ÖŽ ïS›³[]oº³û™ÉÂ[ÿN_ùÿ­Gü%¿ôåÿ‘úÕ·iá+)þj)¶MnŽòDRE Pʧ'•1³IÆ2p9+ŠóúøÉåô¡kÇ6{xná|Kš†ávøêvNÿÏ£êt¿ð–ÿÓ—þEÿëTü'LÖ¾ÅäÏÛñægåqŒqýê嫯ð/Þ¿úGÿ³QNrËåõ¬/»R§½Ÿ£ºûÑxžÈp”z-%³æ›ën²±“ÿ £þ£_ù+ÿÙÑÿ £þ£_ù+ÿÙ×±ø{O·¼–Oµ‡FH”õ-¸píß=«XÞ^)« ô#­wÔã~&…(Õx%±ýÞ½<µ ÿ„oÅ7ŸÚißdñlòvc.‡9Ü»W«Â?ò—þ¸ý ia¸Ó<ÆÕŽ_šÑ®X+¯UňËp´©Jpšóævu—¯ø{LñF–Úf±“Ù³‡h–y"GMÛ3Îõµ(¯xðÊÑ‹?ÜÇfXmYTÌÆR@CÅòXñÉlç¾kÌ¿µn?çŸþÅÿÄ×§ëò¿ÿ¯i?ô^G_1ŸÔœjC•ÛCãxž¬áVŸ+kGù—×R»`Åaµ!X‹(¸ÆOËê@üi¿Ú·óÎÏÿâÿâk£ðÜ6eq¤Ë…¹¿·ó70ÎÞHQ´ž aǨ>À×%4/o<Ê»dŠ0ÎpAÁ¯&´kS§ œíóyìû}Öï±ü©¿Ú·óÎÏÿâÿâk¦°³‚÷÷š$J>ÝYÎìeœ€HŸ”õgÓñ"¸ÊßÔcs·Ì»õ[¯‘ÓŠ…|‘jç-k<˜ÉÆO \Unx³þCÿö Óÿô’ÞXj>Þ+‘lú.ñ3±$&ÕîgŽ,t™f•‚Gz£3±8I'µGÿ f£ÿ>Ú7þ lÿøÕtŸ .´Ý_¶×õaˆ¾Ò¶6ìϰ#È­¾l’TU‡a0n ›ñ/Ãmá_Ú„Dµ¸cuj#PŠ#rHP œ!—·ÝÎ"°L;ż3¦–—NË^éz]~=‡gËÍs7þÍGþ}´oüÙÿñªÙÖ¼O•á×[}$™t÷vݤZ°íW€cÂŒÀÀÎORIâ«s^ÿ?†?ìÿúWs[ÕÃQ燸·ì¿•‰IêI‰µyRWŽÇIt‰wÈˡڋ¹?ºàe”gÔÞ£ÿ„³QÿŸmÿ¶üj½+á–‘>‡¨øBódz¦¿§›¡, 7–u<²yeHÁ*ùà('È/lçÓ¯î,n£òîm¥heLƒµÔFGzVj˜zÕêQöi8ÚÚ-Së÷¦†ÓI;š¿ð–j?óí£à–ÏÿW¢hzíÜ¿ ^í¡ÓÄŸÛ¦=«§[ªcÈ;mÏûXÎ8Î+ÇëÓ¼?ÿ$aÿìa?úN+<ÓF4áh/‰tG~Sïc Ÿpÿ„†÷þxiŸø+¶ÿãt÷×5Ö6{]=VEÜ…´«pdŒÝò2úƒYQE$ó$QFÒJìKÀw5èÞ2³´Ö<¥ë:_–cÓاi` …lç€@äâLñÍy•åFHAÁZNײӷÞô>Þ«¥Np‹Š÷¯Û·ÞqðÞÿÏ 3ÿvßün´ôÍvíôýiŒ:x)f¬6éÖàçÂ99ô ÉñOüŠ×ýxOÿ¢Ú¾eŠ6šd‰J†v ¸UÉõ'sÅ}5âŸù5¯úðŸÿEµ|Å^VañDûþþ[w_‘éÚ¯¦øÆ– þæ8 ˜»R·h,³#o˹òûL›†p20§Å…üMxÚ Öqa"›c½…VYC+d€ 3€£ÐŒdƒŽ>­éwßÙš¥µðÌ{i±®ìëÊçÛpdddu®²”y$´¿Ü{‘ËåB«ÄR—½ËfžÒ{ß}ÍÍ#BÔ´x~Këo)&ÔÖ4;Õ²Ñ̨ã‚z7ý³]õºÄ÷1¬îR"Ã{ wìk‡Ñuk{Ý_ö‚ÞëíI«%Ä·3] <ב£q°’ŠFIêsœäv•âç1ön:­öÓÈÌÝYM{]%g·«·WÐèt½j}IÞd†(ä\4‘™1ÇO”“Ó§AŠÎ–À\ÞÌ4æI Þ`Ÿ{8_˜‚O~ŸSZܵ¬¦TÈ“c*08ÚHÆ"}+Ïúç¶ŒiW^êmÝhõßËÏo™ã{FçOv¾BIo$QG+Ù&v•pÝ1px<Ž té§w#2†Wm³‚w{sÀÉúâ°î®!™"H"x‘Ýixç ä÷ü:jßÈBçþº·ó5ô<8 ¥YAÝ{¿­ú.¾G™š¹5mõý KëI4é­.vÀ§îmÞØ÷ïßžµš-&h„€&ÃÈ%Ôq¹Á9Æ{Ôm·é/ëüަÃR·Óïm‚š%P¯(Þ‘–ÈèyéÇ@=3Pjq[ÞjÎÚt«"Í™9M§l–Ç¡5—J»C©pJç Zñ¥Œ”éû¥Ë{ök¦ÓºgÍ* 3ö‘nö·õý"ylæ…\Éå ¸È©' @$`ŽE]µÿT_õÞOý)—:Ä·V’C(g29s½ƒ*çäÊúu#õ§ÚÿÈ*/úï'þ‚•êäÑ¢±ëض՞ç=Í៴VwEÄ¸Ž x‚,nÙÞÙ,lœrè1ùš·«ÜÚÞ¤SE óÀÃ&§×Ž:É¢¾Ôð cpµ•$æ.1œœàu}k—ñºÄþnÇ jP  ì—'¡íí]}¾¦ð,hùQà…Y1’ 'èFÑÐÁåu Ú÷^¿Ã×Üiñƒ:I#ù e€ÚL¤t,9çƒXUoK¾þÌÕ-¯„~cÛH%w`^W>Û€Èã###¨ñ^t£-Òã€T*J½ùšJÍÝ;mç~›Ùv-ê¾ÕtDw¿‚(ü¹9nc‘‘™K(eV$d)##µløï_ý#ÿÙ«ëX¸ºÒâ±w” žK©™å,f™ð ¢¨ÆrrXçæÀÞð/Þ¿úGÿ³W7—ØÏ—±Ïmõ)ûk_ËÕy³Ò"Ô!²šÓÉHeX°wæE`N7gr:ÎjMq­5F)tù„²Í„dÚËót'AøV-à¼låIÑ”W+iú[·ËMn|zÃÅMM7uýkÿÅÔÒo$p©¶T6å‘Jà£æÎ2H#ÍixGþB²ÿ×ÿ¡-U·Ö7#ÀÎ6,J¾`ÀŒ m9Röw£Û§|Ö>ž D‘¶{dЂ;‚8 õ¯)âç8û9ëïÿ ØðÞ:sƒ¥SX·wÝz_m ìÉm ÎòÂÞX(U·ß‰™X`‚ïTì$‚+ø%ºRÐ#†upãÏJ¸šµÝÈ’L%6ÎØ#Sò¡ ÈPx}8¬ºŠ’¦š•-¯Ôг¥QZ&÷ÿ‡íc¡ÒüDö:¯›=ýýÍ R6¸ÉsBÄ{ƒÛÞªÞ‹=WW¹º¶w·µ#Θʙ)’Àç$N½€ÍdSáš[yD°ÈñȽ‚?Uõ¹J Mc{ÿZíåÿSÆÎtÕ*ºÅ;õ¿ß{ÛÊÿަÚh×F½¦‰ÁK¥Ù9Pã‚FNzvªeÌÆw–iá•,RB™àIÎáŽvcø®•,’ë¶-#³“tŒKä–?ZÏ¢U#Jš²»Ñú/Aά#iÒ•Ý“wèºètú?‰¢‚ÆêßV’òífùBðØR0~bÀóéÛõ•g¦CrI.ŒPfAò·;lPÌqœ )Ï_ašÍ©b¹ž‘"žHÒA‰€ãÐúõ4jÀñ…2,¦0Û6ËÀ’xª^,ÿÅ¿ýƒ4ÿý$†§_këjÖÍ5”‘>|Á6m!”’¥™ËFK±1¡%‰$¨$’*ÈbßþÁšþ’CSAbeíÚÙíê¼½;ƒµ´/KâoìýL¶ðþ·¬ØÜÃÛ¸a_³Å,…Œ›’RY€dA•ÉTŒ®¯Æ^/Ò>"ézg¡/ˆ"ž8îæ‰%.>BÈBî`ÐãåTSž]JSUñŶž›¾öµ×s»XìÇ‚,$Ó&»·×–áÖÀÞC@ˆgÚemH¬QL-ó-€IAÆìÍ\DÚw…îé ÓØHñ vUûeÆHR@'²3ê*¥ç‰üA¨Ú½­ö¹©ÜÛIðÏw#£`ädƒÈð©õïùøcþÁÿ¥w4¡ ñ”}´¯ïiåî¿%ù×C§·ø„ÚwŽ­ïìuÍq|>“™M‰ŒŠ<¶ H¼Ò›BáAÈÛÔ´fÞi>=ñ­îµc4Ú~ž¶ÑÍ}-ÜYt ÉUQ›q;£d žp2ÕÁÔöw·zuÒ]XÝMmsvM…r0pG#‚GãB˩Ӛ«KI¨òßM·×MüõôaÎÚ³:¯ø<5¤³ª}¢æ;ϳOjaTh2dØ\o, ,{‡Ë´†Èf'wÃÿòFþÆÿ¤â¸=CÄ:Ö­Á©k…ä*ÛÖ;›—‘C`Œá‰Á<û×yáÿù#ÿc ÿÒq\˜¸V† ¼¹¥Î¿®‡£”Ûë°°Í êÎËRûEãΊ±H#h"Ë!RªÃ,¸*NàAÎT}G_á¯ZÙ隈n5MN¢QP€Ä!Rç/¸dƒŒƒÉ¯>¢¹1:x„ÕO/•µÑô>涯Ïåøv5­´ ‹ë;›ÛYaû,F]†y9%®öÚ™$¤qž¦¯Úè÷^¾Ôdxš ëäí'?%͸l‚8äãð>ÙÇ´Õ¯lm¥·‚UJ®¤7Óî<s¡ëójw“Ï»yjþW §Ìd øa»œztÀQW[N³¼·º~jÝŠ«……WyotþãvÃÃR_ؤ‰r¢êuG·· ÈÓw3Î7@Öïƒ4øtÿè& ¯µEq²‰e;eB’2‡€O ®5/ï#²’É.ç[IsÀ²9+Оä+ªðýæ¡ñK–öî{™BÊ¡æ¹ÊsŒžÜŸÎ¹ñ‘¬¨Ur––—ÝoëþLqQ«ìj6ô´¿/ëúÛÝ袊üôø³3Äq4þÕ¡RIe2‚zd¡óßü"×ßóÖßþúo抱ù_ÿ×´Ÿú ¯#¯›Ï1¥R =;ÅyŽI%OÒRÕÝ\äá¾ÿž¶ÿ÷Ó…ð‹_Ï[ûé¿Â»í'G¸Ö%™ ê£.Iü+ž™'×Üö¬úñÞ+©½žß#š^$q`¦ÜlïoukmÌOxnòi34°Žö 1΃éQÿÂÐÑ?ç×Pÿ¿iÿÅ×Y£ÿÈnÃþ¾cÿÐ…|ó_uÁ¹ˆ)Öxôß³jÖvÞ÷ü‘߀âüÏ2‹©ˆjëMZÿ…¡¢Ï®¡ÿ~Óÿ‹£þ†‰ÿ>º‡ýûOþ.¹o ø x7Ä>!ó|¨ô؉€6ÊêÈR1A’㑃\u}u/xv¬ç)7gï=ís½ç8µÛî=kþ†‰ÿ>º‡ýûOþ.» _ƺm¦µm$e⸒6*‹‚CÇÍ_:×§xŸþFÍgþ¿§ÿÑ[ǃ2¼¾¢/y;Ýßk[óg¥–ÉæS’Ä}•¥´Üëÿá=ÒÿçÞóþø_þ*øOt¿ù÷¼ÿ¾ÿЬ WÂSi^ÒõÉ$ùï$!â$|ªÃtxÆs¬NOñ(ǹªªY6_U9Bí]­û¥<¯Q^}7ìz'ü'º_üûÞß ÿÅV7Št‹c^mFÝâXn-í¤EÀ®®R½oõV_õãkÿ¢¾O0t²ü%9ÐÝÊÚëÑžãëpåb²ý%'Êï®–¿æ‘ÆÂ-}ÿ=mÿï¦ÿ ?á¾ÿž¶ÿ÷Ó…z*èn|3&®_SЦv“õÜG§C×"²+󺘌E;séuuè|Å_¸Š—/;ŠæW^êÙœü"×ßóÖßþúoð­Ë¿YxCKÑ´ÍB+‰gû#KºÝU—yqÔƒž=+J¸Š?òÑì?ô¢zú>ÀQÎñïŒÖ­é£ºhßǶgQÒÄ8Ù+«FÚíù6tð´4OùõÔ?ïÚñtÂÐÑ?ç×Pÿ¿iÿÅ×'áIâ/ x“ZùgJ€IB2ï“Éáàc’ÃÏ#_¢Óðó‡ªNPŠ•âìýçÚÿ©ë<絺ûZÿ…¡¢Ï®¡ÿ~Óÿ‹®ŽÃǺ\Þ·»[{ÁÝÏ9TˆŸâéóÖ¼ »þDK/û Ýÿè«jÓýDÊ03Z1•ïmeäά2®:¼hVø]öòLôOøOt¿ù÷¼ÿ¾ÿŠ£þÝ/þ}ï?ï…ÿâ«‹Ðô™µÍnÓM€í{‰6–àíQË6 Àã<â¥ñ.ýƒâ+Ý0>ô†O‘³’P€Ëž8#÷Ÿ÷ÂÿñU¯‹|/<6 ñ´°3À‚“1šàk­ðŸü5_úù¶ÿÐf¯+ˆ²¬>,­^’|É+kæ‘Å›SŽSƒž; ¥HY«ê·Kcþkïùëoÿ}7øQÿµ÷üõ·ÿ¾›ü+ÑtÝ ïô‹ûýûVÝrƒ³Ëg¿ ù’9àÖE~I]'V¼Ógdi­'x$hÉ*Y©# dzWê‹ÃÞu¥Ì’ms=Ÿü1íÿlâí{¯¸ôÿøZ'üúê÷í?øºìþøËNñ‰.--!ºIÑ¥&UP0f<ò+ç*õ_€?ò=ߨ2OýF3€ò| b¨F\ðWW•Äó\EUìäÕŸ‘ômQ_,ASU`º=󼄫gå<s^]öëúYÿßRÿñuéúÇü/ÿëÚOýב×Ìgóq© vì™ñ¼OQ­;[gÑ>¾f½²ý®ÖêâÍ£¶PÒ|ógã¿ê~€Õ_·[ÿÐ*Ïþú—ÿ‹®¯Ã€Ã&a AqyÈ&]âSœ»·“ F8+õ'‘Ôl_MÔg´å¢lýáÔÄ`×›ˆ¥*taU-ôz-÷íÛòg“Š¡:8zu’Zèýգ߷oÉ–m¯mÌ­.Ð~íú4¿Ý?íÔ?n·ÿ UŸýõ/ÿPÚÿ®oúç'þ€j8a{‰ã†%Ý$ŒFq’NrûY8¤’ß²òò8}´ÜRIoü«ËÈÛ[]Úš¯öU˜…d ‚ÓGBßë:gŒ÷ô¬ï·[ÿÐ*Ïþú—ÿ‹®³D–Òð]hæò ­%ˆ,H¡•U° €dýüdàäóÉ®6òÖ[Émf’&*x8>ã=Q]xª~Μ*S³OG¢z¯ó[øÚ^Ê•:´¬ÓÑéï/EÕj-*òÖ,Ti¶¨MÄ`2´¹0äeñTþÝoÿ@«?ûê_þ.þCvõóþ„(Ò¢G¾YfØ ƒ˼eHœBð3œqXFr”b´Õ¾‹ËÈåIÎж­ô]—‘£©ÚÿeEhóéVéïÆéSÝHó3 çëŽÕöëúYÿßRÿñuÔ¼?ðxQ“í1ÝßYVH÷aèAPÜŽ=ÊŽz×[ccì¥SK–I5¢ùôîtföŒ©%É$šÒ/ÕmÜ»öëúYÿßRÿñuåÞ.Ö¬"ñ¦»øcI™ÓP¸V–Inƒ97ÌvÌO^½¼‹ÆŸò=ø‡þÂw?ú5«ôO Wµ¯‰æí´êûXõxz£’«{}ž‰wìOe©Zê7öö6¾Ѥ¹¹•a‰<û±¹Ø€MÆ$u©õ™àÐu›½*ûÁú2ÜÚÊc|Izc£ Î R0AÇ ƒSø^ir\ø†ÌÙ%å¶Ø¬ åÌp#JYLŸëU”E½XXyÈ@Ü:¿š<ÚoŒ¬ÓÚ¤H“êØ“fPå]Ê |¿/î󓺿Kž"ÇGÛåi¯ŠWæVvßkmç¡ôÉ>[žý½§Ð¥£ßÛÏþ?[>'Ö¬#Õ`WðÆ“):}‹nynÚÄBñ0àwÀ$äž*·Yp à©…[*FƯ Åc®[ø—Mún®¡ÚHpcc9FÜ:áÉ,wšóÕ{cý„¯É%h»ËâŽý«orçý½§Ð¥£ßÛÏþ?[:Öµ`šW‡Y¼1¤¸}=ÙU¥ºÄcíWhÄÀã žrrO8ÀUnkßòðÇýƒÿJîkЫ‡‡<5{ÿ4¿•ù’žçI¡é'^ðÞ·­ÛxGFÚTAÈ2^4õ`¤Üò¦XóŸº0wqÍÿoißô)hß÷öóÿתü>¾³Ðõë Ýjºc[MlÖWÚt« g¼,ìÛ³˜n0q&*œŸ•G–xÏÃáOß院†MÐ;–‰†ä$àdà€p1q\:þ×RKÛIGYk»÷׿T•¢šý½§Ð¥£ßÛÏþ?^‰¡êvð•ç] OŽ?íÒŸgW¸ÙŸ ù2–Ïo½ŒvÏ5ãõéÞÿ’0ÿö0Ÿý'¦iB*œ,ßĺ¿ó;²qO¸kYн¦ßËŸþ=Z”ÙVöÝøgLXï ÂÂK’ “Ó>n3Œv 3Ž•…cg%ýìV±V‘°]ÎRÌ{($öšõØ¿Š~-ðH~Ù¤;ãÉž9¢ÈÊ9Pvlcžr¤‚+ÈÅU ”âÛ³vz¾»uï¹öxŠ‘£R ½×WòëÜóÏík/ú´ÏûùsÿÇ«OLÔíOֈдô f¤…{Œ8óá9—§9ã vÈ<½jé?ò ×ëÅô¢é­F<«Wºêû¯3¢­(òõÝu}טøµiæH¢ðÖŸ$®ÁQîIbx7“Z¾%´Ã:¨°ºÐ4§cJ®’\áã÷ÙÀ`ÜŒàf±48Yï^q:Û¥´M+\1lB~ê7Ê g{&0 ç¶kÐf%‰ç,¿»n›¤#Û‘\-u_ ÿä~Ó?í¯þŠzÛJ+ Që𾯷©®.šXy½v}_oSß袊üèø‚–±ÿ Kÿúö“ÿA5äuëšÇü/ÿëÚOýב×Êññaè|WÿŸ£üΛIÔ ÐæµF¸’&`¯pÕÕÀ#'p<.; n â«ø¯H]/SB¡mîtÿéÈ?Ž;VL·÷“¼o-ÜòkÚvy¨M¨jid'X¡ Ī7î2#¤nr¥W÷lI»æÈ\ºx9à§,];ÉòÙÅ_Þwßwg«è{üÊK•œ{Æ–ú·Äb·štŽêêy#…IgT,Ûx鸀 ö,8= |‘xËJ‡^Kàó­Ä¶gÏÿD`$ÂÆó±rîb  )féüOÿ#f³ÿ_ÓÿèÆ£)Õ’·»¤’ß¼uéýu>‡‡bÜê$õ²=+·vþ+ƒ]ðõõާ RHÓËçLC#>J‚vß0R$7¾|–êÞK;¹­¥–7„Š» mRÇþÑßRŠ)å“~Ð’.3*ed2®¦ýû·º2— 7*zÆMi®ß_ë©ô1¢ðÒr†ªMi®ßS¯M¶´kû­.Õ2 ¶–‹»¶%2qì9ü+ñF©§saåÎ×/mfK;Hî%pÌÅpwB@àz‰¿ÕYׯþˆJøî=Ÿ6„§{û|™ñœ;åôe8ý½WÉ~qåíÖ5•ô qWG“"-ªœmx“œñÖ¹+ËYlo%µ˜bH˜©ààûŒö=EA[ž¾·°–wžäC¼*‚ ä²°Véòž<ô¯Ìý´q\´êifõÓmím:þgæ^Þ8ÎJU}Ö›÷´Ùëkh·ë~»uÀ|Qÿ¦‹ÿ`Áÿ¥שë3éÓ 1§„qAå²J¼ƒ¹ŽIÏ$äž8éôYñGþBš/ýƒþ”O_gáÔ3Çïî=¿íÓ¿$¦©ã'Ó÷w[o¯ðÔÞñf M¤kB]F$2¸m±ÈÒ¢¹c LþèüŒÁøç%k‹øŸàÕðwŠÞHÝt»¥ó­ ÛGFMÄrTöÉ;Jää×[ž¾ƒLñv›sq ¼VÒùÅçˆÈ„¨$)XÄÜ•Îà2+ö(àjaëK ]òûÊÏÞjí=Ý»Y'ä}w2k”îçGÿ‘ËþÂwú*ÚµuOhr§Š59¥kø£H$‚úígb°*„`cXæˆ1)™6¹Päîb3•£ÿȉeÿa;¿ýmJ®"u©ÅÎ:­üâßá·Èõ2El|5ïù3¨Ò`“Jðäš¼–:„±]HaóíŸÉòV7ƒo(Ãæ|xÁŒŒœœuÞ5ÓŠ<eâ¨-n"¼Šóc—%šœœƒ‚wùARIì”Ñ^]L•XÖR´“¾Û®Ûöê}ŒðŽUE+4ïòí¿â×xIKhº¢¨$››`ïòÍZú‡ˆ´‰n4ò—¶×ÚÁ$P¼‰2Á¬»U£,ªøTÜ‚‘ƒ–ªšd°Oÿ $¶²4–ï©DÑ;–%ùä[’q޼ú׃ĸ‰ÕÉ«óAÇEÿ¥/#ÁâjÒ©“WæƒZ/ý)Ö—tt¯A¦›KÀB›pwü²(f>fͽ2Xç<zÖ'‰t¤j…"R-¥âÉ'«ŸcõàŠÆ«ºDÉmªÛÏ$ˆ‹oË®á2CŒž2ÆsÚ¿–*5 ¨É[]š.Û/ÍŽKE5‡œmïhî½Ôô¶ÊëæŠUÌ|Aÿ‘Iëú/ý-zF­­‰àº‚;§ž9|´Y˜ˆÐÍÔ Øˆ,Bœûù¿Äù—þ¿¢ÿÑr׹´áO?ÃF¿½ú?S\ (RÇÂ0—2×õó~¿3;KšoøHÔçÒµƒöÙ£Ôî ¾ÌŠÊfHâ%¢pÙMïÔnYz2z_>[‹;/YZ\ÀnÖ5¿†u!ã%–̸!csŒíd’|fŠýíå²úÌq1¤›¾ÞO¦ú[úGÛóû¼¶ õ_€?ò=ߨ2OýtvÞ<ðìeω¡_&!h|ˆfj+£Ÿ)¿t3l3q“×w9ðþG»ïûIÿ£b® ~.®#/¯í)¸Yu¾¿z_Ó*1JjÌú6Š(¯ÌŽÒ–±ÿ Kÿúö“ÿA5äŠ@`HÐ÷¯[Ö?ä ÿ^Òè&¼Ž¾Wˆ?‹C⸧øÔýæ[7òH_ìŒN$ózÎsŒúþy.åñ·köè̼mo'åmƒ$ú眓ÓÒ±¨¯æ´nê÷k¹ó°ÄÎ.Òw‹wk¹Ò]hÂΩÒeòˆÛ6+#DÙàŽ>`3ž…±Û'ÚâKK„ž‘œ©*׊u¯úæÿ®rè ª«UI©Arïýt.½hÉ©Ó.¯üû/éàÔî­oMä s‘Ë 8Ç€õõ«Öw «jÆ}Y£•Dg~Y!fì0r °ÈêzècQS ó”Õïn—"ž&qiI·ÞÍèߦÇCöm>Û^ÓÒ\v¥Nñ€ÏËŽ‡'<óXÖ·³Ù‰>^$8x•òÎ>`{àþÒ¥Ñÿä7aÿ_1ÿèB©UÔ«t§Ë«ÛÑU¬ÜUJk—WµôÒ+Ô¿a¬ßéhég0ˆ9Ë~íI?‰«ztVSX^\Þ-³O–hƒH.H(N@GQéÁÅ¢¦ž"Q²—¼–ÉìE,T£e?y+Ù=•ü‰ïkr˪¨ VÜ7`nÁî3šñ¿È÷âû ÜÿèÖ¯]¯"ñ§ü~!ÿ°ÏþjýCÂç|F)ùGógÐpóæöÏÓÿn(¾¦ï£C¦5¼&(ey’O›xwØõÇ"4;z’kgþ½_þïøEvÛdíÛåùgvwïÝ»9Îÿ›Ó¶1ÅsWë³ÃR¹£{;¯'ÝyŸIvXÔ>%­Ö™5¯‡õ ¬UXêQ²NóÄTE¾4Â#f "|¸äž¸?ÈbßþÁšþ’CXu¹âÏù [ÿØ3OÿÒHk“ B­ÑU'Í£ü×›ïø"›MhŠ·ºíö¡¦ÚX\}˜Áh¡!Ùi:¨ÉÆõPÄe˜IÉ$œžkbøƒÄñiú¥¨Ùg‰âXãšÞ(!‡(%‘AEqÐf¹Z+ªXZ.ÞêºÕh´oªó3=GÄ'aÓõ›0–ìÖÞežÝJ *L¿îä'gf$.r :îQ±ËqÚ¼Ío§xRd^==DˆI—•`AÄkŸ­Í{þ@þÿ°cÿé]ÍrÓÃJ$g7+¾¿á}î6î2Oê²x†-y¤¶”M½f[(TÉ;Ê„ÚÍ’Nâ ÎxÞѵAã?›ßÍoy VŒ²n–+7#f4.Ã|Çî«uÀÄQ]0”åq(»Y4µKÉî¾BRg xÛDðÞ™ ÆÚ\Úd—ñÞyrÉev%Ião1•£_9Ù€ŠÁ×ïck’×¼?ÿ$aÿìa?úN+ÌkÓ¼?ÿ$aÿìa?úN+ÎÆQ•<#9¹>u«=,¥ßëØNÓGnÍÄCç]J·B?„‘øÖžƒâÍKÃbài¢ûABé»8Î'¶OçXtV5(Ó¨ššºgßN”&ššºgU¢h6Ú—†õ]FKID°ù æ,$e™rØW*As†û£,'}*=7·-,R[ê3Y?Ú-äãdU¹·Hn¹ùŽáÔuS\ujé?ò ×ëÅô¢å«Fj\Üús'oÂÛüý|´0©Jiósiu§áo×Ô«i¨ÜÙ[ÝAˆGu—0xQÉ\ç°%yÁã€{ ¿§ø³ZÒôÆÓlîbŽÍ÷oˆÛDÁ÷uݹIlŽ9Ït¬Z+ªtiOãŠ~¨ÞT©ÏâŠ#­ÓtÝO Å6£4\´êÏ,w¸„ʨËåïÜ$s±—o¹ã[ÃVÖß4°GBKy^X’ñnB>Ù†7¯t)ÇlמWUðßþGí3þÚÿè§® e F…Y¹·¤´ùmý|­×FQ£RNMé/Ëúÿ€{ýQ_ŸUÔ¡{*ò—t’@è£8É*@¯:ÿ„K\ÿŸüŠŸã^ŸEyøÌ¶–.JUVíoò<¼Ã)¡Ž’•VÕ»[õLóøDµÏùñÿÈ©þ4Â%®ÏþEOñ¯O¢¸ÿ°0ßÍ/½‘çÿªø?æ—Þ¿Èó­7Þ«g4¶{cŽtv>jÀžõã_ð§¼yÿ@/ü›ƒÿ‹¯ª¨¯¥È1SÈ£R8dŸ=¯Í®×ÚÖîz8<ªŽ.4Ûwïoò>Uÿ…=ãÏúäÜü]ð§¼yÿ@/ü›ƒÿ‹¯ª¨¯¡ÿ[ñßɹÿòGgÕâ|«ÿ {ÇŸôÿɸ?øºîõßxž÷Ä:Õ¾™¾®å’6óâR䃂ÙèkÛè¬*ñ>.¬”¥é~­¼üŽì"X)9SÖýÿ¤xü+ÿÐ'ÿ&"ÿâ¨ÿ…oâßúÿäÄ_üU{ýë+ùc÷?ó=/íÌGòÇñÿ3À?á[ø·þ?ù1ÿ]¤žÖš;P,¹KH#oÞ§ ±*°ë؃^—ExÙÖ"Y½(ÑÄh¢ïîú[­Ï:ÛcG¢‹¿»§Fºß¹æð‰kŸóãÿ‘Sühÿ„K\ÿŸüŠŸã^ŸE|çöù¥÷¯ò>kýWÁÿ4¾õþG˜Â%®ÏþEOñ®CÇ¿ üY­_é’éúOœXù2´D»_Ε±Ȩ̈ñ¯~¢½Œ’’ɱ_ZÃë+5ïjµô·nçf$Ãá&çM¶Ú¶¶ò}¼•áOxóþ€_ù7ÿGü)ïÐ ÿ&àÿâëêª+ë¿ÖüwòCîü‘èý^'Ê¿ð§¼yÿ@/ü›ƒÿ‹®¯MøoâÛ ÚØË¤í¹K뉙>ÑÂ4p9ÝŽJ7åô¯¢²«Å8Ê©)F:k³ÿ3£ /ªÕU¡ºîxü+ÿÐ'ÿ&"ÿâ¨ÿ…oâßúÿäÄ_üU{ý—úÇŠþXýÏüÏcûsü±üÌðøVþ-ÿ OþLEÿÅWEáïx†ÇJ¿†çOÙ$³Àè<èÎB¬ ôoö—ó¯]¢¸³Ö¶a…žªJ2Þ×¾÷êßäqf8Ú˜ü,ðµRQ–öß{õ¿äy‡ü"ZçüøÿäTÿ?á×?çÇÿ"§ø×§Ñ_!ý†þi}ëüÿUðÍ/½‘æð‰kŸóãÿ‘Sük Æ>ñ6«áÕµ²Ó<ÙÅÜrmóã_”$€œ–«ν²ŠîËrúYv.º-¹AÝ_o’üÍðü=…ÃÕUa)]wkü•áOxóþ€_ù7ÿGü)ïÐ ÿ&àÿâëêª+î?ÖüwòCîü‘ë}^'Ê¿ð§¼yÿ@/ü›ƒÿ‹¯@øAà?ø_Å—WÚΛöki,^>7Ë™# aXžŠ*öª+ŸÄؼUQœcii¢wüÇ1‹º (¢¾t؊溵šÝÉ *21^ Ž+ÿ„Kÿž÷Ÿ÷Úÿñ5ÓÑ\õ°´k´êFö9q,>!§Z Øæ?áÒÿç½çýö¿üMð‚ióÞóþû_þ&ºz+ìÜ'üûG?öFþ}#šÁllXOw’¥yuî1ýßzoü š_ü÷¼ÿ¾×ÿ‰®žŠ?³p›{4?ìŒ ­ìÑÌ ¥ÿÏ{Ïûíøš?áÒÿç½çýö¿üMtôQý›„ÿŸh_ÙùôŽvÛÁšu­Ô7 5Ñx]C2àsÏËQ ¥ÿÏ{Ïûíøšéè§ý…µ¹ÿ²pV·³G1ÿ&—ÿ=ï?ïµÿâhÿ„Kÿž÷Ÿ÷Úÿñ5ÓÑKû7 ÿ>п²0?óéÇü š_ü÷¼ÿ¾×ÿ‰®sTø#á­[V¼Ô§¾Õ–k¹Þy9c ر1“ŒŸZôª+ÐËç,¹ÊXGÈ彺Øè¡Ãп²‚W<«þ…è!¬ÿßè¿øÝð |+ÿA gþÿEÿÆëÕh¯Oûs1ÿŸ¬ÛÙC±å_ð |+ÿA gþÿEÿÆêÞ£ðGÃZÊO5ö¬®C ,`mŠ5O1õ ûç¥zU¿¶³ ó{Wqû8v<«þ…è!¬ÿßè¿øÝð |+ÿA gþÿEÿÆëÕh§ý¹˜ÿÏÖ/eÇ•Âð¯ý5Ÿûýÿ«w|5yma—Ú°K( •–<•2<™?»ë™Ó€+Ò¨¤ó¬Á´ÝW ýœ;Uÿ ¿ôÖïô_ünøP>ÿ †³ÿ¢ÿãuê´SþÜÌç벇cÊ¿á@øWþ‚Ïýþ‹ÿÖíŸÂÍËÃ@ŽëP6†óí»ÚDß¿fÌgf1éœ÷®âŠŠ™¾:¢Ju[¶¦”ÿu%8hÑÀ¡ðÿüþj÷ö?þ"øT>ÿŸÍOþþÇÿÄWEOö®3þ~3·ûCüìà?áPøþ5?ûûÿV-þhvÐ]Ä—Z[¨„NZDÈÕøù:åášíè¤óLcÞ£Çâ^ógÿ ‡Ãÿóù©ÿߨÿøŠ?áPøþ5?ûûÿ]ýÿµqŸóñûCüìà?áPøþ5?ûûÿWôO‡>ƒ«Á©ÚÜß<ÐîÚ²º9R§8PzÞ» *g™bçTm2eŽÄÊ.2›³ (¢¸NCÿÙhpcc-1.4.1/hpl/www/pfact.jpg0000644000000000000000000004575511256503657012561 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀÈÊ"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú(¯'ø›ñ7ÄñnŸáÿé–73\Z}¦I.÷ç™B*ˆÙ™˜àNФX¢¼þŽô&h÷ùù"øH~9ÿЙ¡ÿßåÿäŠö +Åíü_ñªî{¸`ðކòZJ!œy€lr‹ Üsòºž=}sV?á!øçÿBf‡ÿ—ÿ’(Ø(¯ÿ„‡ãŸý šýþ_þHªö/øÕ©éÖ×ö~Ð䵺‰&…üÀ7#TàÜddÖ€=¢ŠñÿøH~9ÿЙ¡ÿßåÿ䊯{âÿZ| 5ׄt8ãic„0¼Ž± âã»2lóÅ{Eãÿðüsÿ¡3Cÿ¿ËÿÉÂCñÏþ„Íþÿ/ÿ$P°Q^qñ+âÕ§Š-<5?†t4Õîâ3Ao’w  IÜ'Ú>ãu=¾•±ÿ Ç?ú4?ûü¿ü‘@ÁExÿü$?ÿèLÐÿïòÿòEðüsÿ¡3Cÿ¿ËÿÉìWÿÂCñÏþ„Íþÿ/ÿ$Qÿ Ç?ú4?ûü¿ü‘@ÁEx~‰ãߌ^#ÑàÕ´Ÿ èw3îòåß³vÖ*xiÁ‚9¡ÿ Ç?ú4?ûü¿ü‘@ÁExÿü$?ÿèLÐÿïòÿòEðüsÿ¡3Cÿ¿ËÿÉìWÿÂCñÏþ„Íþÿ/ÿ$Ux¾$|EÑ|[áý3ÅÞÒ­-u{±l†Ëœ²©`D®ÒêpG=:€h¢Š(¯ñü„ÿì'þƒu^Á^?âù: ÿØ*Oýê€=þEOûÿôÝÿÜÿú+þ¹ÿªè(¬4ò|- ¼ÄÑW ÁKЩênÉTU ‡¿ä9âÏû Çÿ¤VµÐV>uöWÄ1}ž¾Í¨$[âM­.m`}Ή¾}¹þê¨í[W?àOù'žÿ°U¯þŠZè+?BÔÿ¶ü=¦jÞO“öëH®|­Û¶o@ÛsœgÀ  çüeÿ ;oû é¿ú[ tâ[¯±éPKöx'ݨYE²tÜ£}ÔI¸ï.íÊ{0µlQEãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+Ô|uöŠO‚ÿ³¼Ïí F¹ûgŸ/S·fÞÕuÜ>÷·=…QEQEyÿÁ/ù$:ý¼éD•èÏø'Äÿð˜øBÇ_ûØþÕæ~ãÍó6í‘“ï`g;sÓ½tQEWü_ÿ’‡ðÇþ§ÿFÛ×°Wü_ÿ’‡ðÇþ§ÿFÛаQEWø‡þN‡Âö “ÿAº¯`¯ñü„ÿì'þƒu@ÁEP£#øKUñ {?#ï¨!I0ÃìÃì° üœpAO—-6dÇÜV>}qwªø†äߦ À6± ¬Ç_™Øóë銯ÿ"§ý‹ÿúnÿîýÿ\ÿÕt‡à¸&µð/‡­î"’âÓ-’HäR¬Œ"PAAã¹Xþ¾¸Ôü¡ßÞIæ]]iöóLû@Üí–8 ’zPÅaø² ®t{t‚)%q©Ø9TRÄ*ÝÂÌxìO` ­ÊÇñ-õÆŸ¥A5¬ž\¨YBNÐr’]EŽ}U˜{gŽhbŠ( +×tJoÚ7äZ}Ûéð鎒ݬ,bFÛqÃ>0̼ÜzתW›ë>)Öm><ø{ÃP^lÒ.ô÷š{)÷ 9q‡Ü^‡·Ö½"€ (¢€ (¢€8„šgÂÝÎþÒ{K¨üýðÏ×3ÈFTò2?wÇü-Öõü8ÒumZãíÓùÞd»7m™Ôp À]…QEãÿÿä¡ü1ÿ°©ÿѶõìãÿÿä¡ü1ÿ°©ÿѶôìQEãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+Åü_e¡ûJøZÖg#})ò`áqrxt!‡NÇž(Ú(®þÝ/þ~µÏüÞÿñ깩hz­ÂÏq6¤Ž¨ ]JâÝq’yXÝA<õÆzz F±¸´Õ|C4ñìŽïPI ;Þ‚ÖÉã§ÌŒ9ôôÅlWá¼A¯IöÍV_°j ºOªÜÊŠiQä*ß4Ž~`qž: jOá=:æâYÞçY#—aµxŠ 9áVP{í@ÿÈ©ÿbÿþ›¿ûŸÿE×?õV<'cq¦x7C°¼ËºµÓíá™7µÖ5 288 ô¢ÇÃV}äwPÜj¯"g}Zêd9r!S׸ã¯Zñßj:ri>:Šu Ûkµ–;—[­F-Ý"I ’³ˆÄq‡ÃHãÝu§Fuq[k³õ輄Ýz¬ØÜjTÚÇæHº…”Än ÔR9çÑU¾8湟^iMo¤Amâ)n“]K”˵ñk`|––f6årrs–ÀÆ01È^j:K^Í6£sj8§Õ#kˆÚïÊr¬÷%cU× ùIn\nð5’“³Ò÷Ò]/åý׿è칑í´VJøvÅt×°ê~KÉæ:¥É—8S&ð8û¡±íÍUÿ„7KÿŸ­sÿ·¿üz¸Ê9}gÂÚÍßÇŸx– =úE¦žðÏqæ Øågm'qûëÐwúפWë©qÇ_ x^-WYMm1Þ[uÕnAvÿHl³ù›Éʯ$ôt¯Dÿ„7KÿŸ­sÿ·¿üz€: +ëÃVžG›qª¯“¾N­uTt-²A¹¹å›,{“QÁá=:Úâ)ÒçY/‡Q&µxêH9åZR{Aï@”V=÷†¬5 É.¦¸ÕRGÆDµÔ(0á@£§aÏ^µÓ­®".u’ñ¸ukWޤƒžU¥!‡±ô—ð·DÔ|9ðãIÒuk³ßAçy‘oWÛºgaÊ’ våÿ t„ñÃ'VÕµ-râú;Ì—ûjí7m™Ôp²€8p+¼Ót =*á§·›Rwd(EÖ¥qp¸È<,ŽÀ:ã=}MjQ\ÿü!º_üýkŸø=½ÿãÕb XAgsj—©ŽãnòúµÓ¸Úr6¹²{í#=ElWü_ÿ’‡ðÇþ§ÿFÛנ¥ÿÏÖ¹ÿƒÛßþ=^oñFÊ-?Æÿ mayÞ4Õ[yÞg9šÜòîK½Ï:P´QEWx®êÞËö™ð­ÅÝÄVð&”û¤•Â*ån@É› •yû@\¨Ÿ˜¯jNöШòó.m_ÿ„§ÃßôÒÿð2?ñ£þŸÐ{KÿÀÈÿƼÿþ'Á¯ïéø&“ÿVlj5¿†žÔc°×-t»K©"*e™2„R2:©ü«?ÞùŸì?ßü }7Xðå•þ±pq  ˜Î$ã'¯ðÞ·ðÓÅÚŒ–®—wuFfOì³ ËÆVyOƒ|OðÆÆ×BmZãUkØL­wö»u–#BŠw€¥š=Àì$6âpk»V¤#Q6µOGÜRXoð:/ kðè׎O_ˤ»àž d$i:¬¶.ABÛ¶àòþm©jÞ†ïƺÒÁÉçû|aa‘¢º’U“pB9ÚŠ¹ Ç–{€9ßÞx'VøgðÜzì×Z~|Ù¤¶P#ó&\}®@¸9„{IÇCU$Öü±£Ik}­ÜYÇ-»º_EØvÝLs(1î#+…\°;cô¥‰œc=cfŸEÞ¦Úyü´ò¶M`ús~Òÿð”ø{þƒÚ_þGþ4ÂSáïúiøø×'ˆ¾Ë᩼D–úYÒ¡¸û3Üe$À;vù{º0çæ²ÿáb|þþ—ÿ‚i?øÕ|ÿï|ÿØ¿øŽŸáëÿŠO?á/ÒãþÏ´ko±ù±Ÿ3"Q»~þ?ÖôÚ~ᅦü%>ÿ ö—ÿ‘ÿxŸŠµo i?|-¨Ê¶Ðx^ãGN‹j|©‰ö;D$“³ªä`tÅuŸð±> KÿÁ4ŸüjÞùû÷ÿÐ?á)ð÷ý´¿ü ühÿ„§ÃßôÒÿð2?ñ®?UñG½ì_Ú1ipýºÒ;Ûø•3o…󵸌ã8<j¯aão„zž£maf4¹.®¥HaO쇈 2bÀÉ#­½òöïþqÿ O‡¿è=¥ÿàdãGü%>ÿ ö—ÿ‘ÿqúߊ>øsXŸIÕ¢Òíï ÛæEý”Ï·r†¬d<4O|+ñ±“¤Å¥Ü_O»Ë‹û)“vÕ,yhÀy4~÷È?Ø¿ø x;Â:ü%Z]çÙ|ÏßùñÇ»tŒÿwyÆ7c¯jè?á)ð÷ý´¿ü ükÅ~øÏáž“à2ËÄ `5H¼ß?ÍÓ^VæW+–~R½ëм7­ü4ñv£%†‡k¥ÝÝG™“û,Ç„rñÕ‡çO÷žB_R¶¼ßÔÂSáïúiøøÑÿ O‡¿è=¥ÿàdã^ÿ à×÷ô¿üIÿÆ«BÓÅ ï´}GV¶‹K{7Êû\¿ÙL<¿1¶§<œ‘ŽÇz_½òû÷ÿ°ÿ„§ÃßôÒÿð2?ñ¯*ø¥ªéÚŸÄ?†¿`¿µ»òõ_ŸÈ™dÛ™mñœ3ƒùV¯ü,Oƒ_ßÒÿðM'ÿ®kÆÓøwPñ_ÂÝ[ÃPZ­ÞªØ– o'ÌÙ< È*0äS´¾¶"¯Õy_³æ¿¬{ÝQZ^?âù: ÿØ*Oý꽂¼Ä?òt>ÿ°TŸú Õ{Q@zUœ6ÚŽ¹4WqÎ÷W«,±®3 xScryÚŠüã‡w:•Ïø{þCž,ÿ°¬úEk]äž‚þÂïÃö¯ˆ®¬-î¯ K×»fµ–‚1»vcÉ|f5Ý…9Ã=zÝxÂÝ8-¼!x5 ÙeºŸÊ†ñÚrdµˆ N ‹ŒºôPF+ÓËù9*ó+ÿÒ_š"} ‰Rê­ð‡EûE—‰£Ýçý§í—lþV.gÚsߟàÎÜqÕµi©¨jžƒP‹Å–Û¢šiµæxHîätŽu1(gr¨ªIR7ǀعïˆÚ vŸt º†/Ø~Ñ·ì·Žþvû„ÏÙò~|zsй'†¢²½ðUÂêž”ÚÜZ KKùY3¨1Ì*NFþIèCú õçìyjúJÚ>õ|ý?¤ïš¾Ÿ×cÙ`‹þÛˆ­ÖH×E™Äp#œ}ŽBp±©éå1áWª1U\«*ǹQÏ7VòÛÜEÐJ…$ŽE ®¤`‚Æ+YæðîÆ¸–IôU@¯<¬ZKÿ°TŸú Õ{QEQErÿô{=Àšn™a«A«ZÃæì½ƒ%Ì®ÇfGSÒºŠóÿ‚_òHt/ûxÿÒ‰+Ð(¢Š(¯ø¿ÿ%áý…Oþ·¯`¯ø¿ÿ%áý…Oþ· `¢Š(¯ø•¬^h?¼=©Øi3ê×PéGe”ß.MœaXð =J÷ŠñÿÿÉÐøOþÁRè7TÂßñ‡ýmsó›ÿŒWIã_ëÞÖa²Òü©k°=ºÊ×6¥ö£a°í†@Pzÿâ»Ê(ÊüñUÖ¼]6Ÿ'ÃÝKJKˆ{ûÙe‘–)VÝvï  J$@ ¼9äÕ¾)ø«NÖo¬­þë7[ÜIw1™v̪Ä„Œ3Ôõêk´ð÷ü‡0= )TF¬ŸäüŸèD¬r~#ñmïŠ>È·Ðtkk\y4MÜnœnû‘´¯ï0Í×·J!ø¥<·š¸øo¤¸Bl¶¶Ï ²:ÎÍ€%WÌ·0qüD Ÿçßð‡Ãñ2Ñ¥ßö¯ø÷Óü¿; þ§÷+åcø¾æïöªßÚwê>_ímmóÚŸ.ÛKò¤—þ&ÌMöuأɒ­ÁÝ–õ'„©ËSÞz)tzÙÔþï“í¿“´)-?®Ç¤CñÄ2ø"ã]ojià·],™<ÙjŸ4~ë8É#îöëX?ð·üaÿD›\üæÿãìWÍ›âéž ñfƒ©ÝÝë £­ä­s$GJ‰–C$+€C"ƒ)ÚÈ<°Ý«çü-ÿÑ&×?9¿øÅ!ÿ“¡ðŸý‚¤ÿÐn«Ð?äTÿ±ÿMßýÏÿ¢¿ëŸú __øâM#û/ì5]Gíš|Wsy&Oôi;¡lD~eÀÎpyè*ž“ñOÅZ޳ceqðÇY³‚ââ8¤¹Ë¶f¹Ì `ž£§Q^©Ey¿Š~#ø“AñÞ™aðóUÕ­aÙ²ö&ÉrŠÇ‰‡‘Ôô£ÂßüI¯xŽÓL¿øyªé6³oß{9“dXFaœÄ£’ê:פQ@áïŠ7ºn‰ Ÿ„¾k3h1¼‚ÎDšW ¥Ø“Ÿ-ùÜNFæÁÈÏÜx+Çz÷Š5™¬µOêZ nÒ­ÍÑ}®Á”l£Q’ž¿Âx¨þ É!пíãÿJ$¯@ ÿ…¿ãú$Úçç7ÿ­;â?‰/|9­js|<Õm®´ÿ#ìöNdßwæ9VÛ˜ùÉÀ={W¤Q@?ÿ ÆôIµÏÎoþ1Xþ6Ö/5íá6§¤Ï¤ÝMªÉ¾Ê|ï‹(ÎUO Ðu¯x¯ø¿ÿ%áý…Oþ· `¢Š(¯ñü„ÿì'þƒu^Á^?âù: ÿØ*Oýê€=‚Š(  ½*òG\†+Hà{[ÕŠY¼/½¸íuNsÂ{ Jçü=ÿ!ÏØV?ý"µ®‚€ òOßhšß‡ï,µ=>¹º¼–6#1³AhPƒ FdólW­×•ü:‚k];Âö÷xÚâó’HïT­²0·Œàò!ˆ‡÷·W¡vOGÛ³îŸágæDºoĦÒÂìúý•Î|ÿ³yz"Áö¿ô„ß³ >ϳ¾1¿ç5µiáû-SÂ2®·¦j®íVö£ÃÑÛ™÷ÝÈ‚T“gî|¶bÛF2b'‚ùªäØÈåÿ-?ãÿþ¾þ??öŸáZZÞð·üÿñùeÿ!/ø÷ÿÿùyÿoû¾ÞUz“šå«¾Òþ^õ?»ù[ÊÞï,vþ»ÍEWÍ›«âM6Ûã.‡áù|=i>¡udÒÅ«1_6c±~Bqò7F|ñëÞW•ëºN¥7íáR->íôøtÇInÖ1#m¸áŸæ^ î=kÕ(÷ií•¥Lá,å ”Øà@AûŠOú¾vó希>å6HÒXÚ9]eaAê¬9$ HÒHìú’ÌÌrtòz’ç‡þŠÿ®ê€7¨¢Šåþëz÷4ÝNÃIƒIµ›ÍÙe6E‰]N0ª9 žƒ­uÃü °¼Ó>èÖwö“Ú]Gçï†xÌn¹žB2§‘Aük¸ Š( ¼âÿü”?†?ö?ú6Þ½‚¼âÿü”?†?ö?ú6Þ€=‚Š( ¼Ä?òt>ÿ°TŸú Õ{xÿˆäè|'ÿ`©?ôªö (¢€2ô­JkíG\·•c az¶ñ%M¼2å¹ëºFc€>§R°ô(&‡Xñ;ˈ“jhñ3)×ì–ë•õ•†GpGjÜ ¼[À)6©ªøw\¸ðî­o=Ýýýì“—?gFšÞ"dÁ‹>[žnàƒó={My_ï ͧiÞ{Ic<sÉu&¢KBÏo™LDç2·f>M¹ã5é`*ªpªŸX¾©t}䝸ú=œI^ÇŸ|CÒ¾Ïð‡ÃñOë6ßeûWü|IŸ²n¸Oõߺ\ïþ¹×ø«_L·›AÖü}…uËgŒGhf½´VË5ä±²È+—+)*w/ßN>i¾(èdøCaÿoØ~ÅæÌOÍþÍßpžÿ¾ó?¹­[¯ M¾–‡’Y½¥Í¦nV3›ÛÙv|ÀT–-ü"CýÊõ§‹‹]wRûK««ÿO5ßÏçt嚎ß×o#Øè¢ŠùssƒÕ|k©XüeÐü‡O¿²k‰ddo40)ÝŒ~ízƒÔþåy^»¤êS~ѾÕ"ÓîßO‡Lt–íac6ÛŽñ€~eàžãÖ½R€ (¢€9ÿù?ì_ÿÓwÿsÿè¯úçþ« ¢°cü5"Ç3ènBª¨ÉÓÉèÿžú+þ¹ÿª«ðïÄ—ž.ð&›®_ÇwW^nôHA¶WA€I=w®¢¸„šgÂÝÎþÒ{K¨üýðÏ×3ÈFTò2?wQEWü_ÿ’‡ðÇþ§ÿFÛ×°Wü_ÿ’‡ðÇþ§ÿFÛаQEWø‡þN‡Âö “ÿAº¯`¯ñü„ÿì'þƒu@ÁEP>}qwªø†äߦ À6± ¬Ç_™ØóëéŠØ¬½+MšÇQ×.%hÊ_Þ­ÄAIÈQo Xn:îà Ô ¼ƒáõÝ–¤ÞÔ'Ñ,†¥sqw<—‹¬£¿žöñ™eƒËIœÀýÞ3šõúñoÚͤj¾Ò.5M iìïïídKk®Í¼JUd0)ܤeŸpÜŸž]áVßÊÿ›ùeÛõÓ¾—"} /‰K¥„:/Ùô +lyÿfòõµŸìŸé ¿fý£|gfyÆ+Z;½7NÔ™áÝ>Æé¤‚n"ñw-o—¯ˆ±î>pei>`ß0ò qÍ|CŸÂ ÿÄËF—Ú¿ãßOòüì\'úŸÜ¯•âû›¿Ú­}2Úmc[ðM¬χ$|GrR0Äò¬W’ÈÆ&ë°mŒŒe2Cqóe½™ÒŸ-mö—Zêùùuùêäfžßð<~¢Š+äÍÏ7Ö|S¬Ú|yð÷† ¼Ù¤]éï4öþRîrâ7¸½o­zEpz¯‚µ+Œ"žÐiöMo,líæ–"a•qÞ/R:Ǽ Š( ›$i,mˆ®Ž ²°È õS¨ 7ø[â}RëÀZ=çˆgûDW)"Ǩ7T)# IÏ© ‘!áÊØm¦OH®_á߆ï<#àM7C¿’ .­|Ýï‡t®ã€z0íV?äTÿ±ÿMßýÏÿ¢¿ëŸú ‚Š( ¼âÿü”?†?ö?ú6Þ½‚¼âÿü”?†?ö?ú6Þ€=‚Š( ¼Ä?òt>ÿ°TŸú Õ{xÿˆäè|'ÿ`©?ôªö (¢€9ÿÈsÅŸöÿH­k ¬}×ìú¯ˆeûDý§PIvDûš,ZÀ›\ |›±ýÖSÞ¶(¯'øg®yÚ7„­?á2ûNå’°flßåÛF|3yYÝ¿ø÷cµzÅyGƒ5‹;SÐü0Þ!½»ŽÖö÷L(Úd1$«mxRÁË*¯PÜ–ÎÍzNq¨Ò½“üŸ÷eúz­ÔIÚÇ=ñG\û_Âø¬¾Ýöß3þažWö–Ë„öýÏ—ønÅij>#ûkxZßþïíµ^Y?Ù±üŸ¶âÿ÷ã÷{vcü¬ÿsÿ¼Ws¨|!Ñwëw·Úž™æiÐÇö¯*á1¿kŸ+n8Ù»w|V´~+»×õiÒxªå’öH..ôØ ŽàÅzíó:¹(çÉ ª †!2FãRxY(Õ÷VŠ]zŸôëËû»n¬”#›oë·Ÿõùû…Q_6lxÿˆäè|'ÿ`©?ôªö ãõý¿â†“ãOí/û>Ñ­¾ÇägÌÈ”nß»õ½6Ÿ»ïÇa@Q@Q@ðKþI…ÿoúQ%zsþ ðÇü!Þ±Ð>ÙöϲùŸ¿ò¼½Û¤gû¹8Æìuí]sÿò*Ø¿ÿ¦ïþçÿÑ_õÏýWAEaÀ‘ønâ+_7f“pâ;deùmd' Ý£láÀVùÃFй^?ñþJÃû Ÿýo^Á^?ñþJÃû Ÿýo@ÁEP^?âù: ÿØ*Oý꽂¼Ä?òt>ÿ°TŸú Õ{Q@ÿ‡¿ä9âÏû Çÿ¤VµÐV^•Šj:ãZM$“½êµâ°â9~Ï ¼<±ïË{ J+Éþèšô:7„®çÔu—³U’fŠVO(Å%´b%8œŠA)ò“’r©ÔúÅy'aðä7~FÑíº]^ KæÖ•åaäF Æ‚^EÛº= ÇŒ3^†›–§/gÑ>»_…ß•ìœK¡…ñGD×­~Ø}¯QÖeû™öß´2;}Ây~v'lã»|Ï}•¥¨øwĶmák‹ÝW_šÚÚòÉ.£¹hÌr¹¿Ê™qrÇp ˜Â¿ ¼ŽBóÿ—Jt_³èVØóþÍåëk?Ù?Ò~Ì1ûFþøÎÌóŒVÕ¤“Tðˆ¸ðýžžDÖ×6þ!K‡Å܆8Ö0XÌ­ °h‘†FÂG©7W–®ÛK¤;Ôþþv¿•íhÓOë±í”QE|Ù±ãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ƒÕl¼(ÿt;»½Nî?%“-’©ò¤‹e˜ì#82û£^ò€ (¢€ (¢€<ÿà—ü’ þÞ?ô¢Jô åþÛèVžÓ`ðÕì÷ºBù¿gžpC¾erÙW£nOƺŠ*9à†êÞ[{ˆ£š P¤‘È¡•ÔŒAà‚8ÅIEa¤óxvP–Iô¨°c½v,ð'B³ÉUã÷¼ü¹2ca‘üïâÿü”?†?ö?ú6Þ½‚¼?âf™ý•ñáµ½¼ßñ/þÕÞØ¯ü{~ö È­Ÿõ}6®>^@;vª{…Q@xÿˆäè|'ÿ`©?ôªö ñÿÿÉÐøOþÁRè7TìQEsþÿç‹?ì+þ‘Z×AYzT–/¨ë‹i ‘Ηª·ŒÇ‰%û<$2òxòÌc·*xîu(¯ øWØüÿÙº2b{ÆóbÔ7L3kÌÎ9vèë´í©œŸu¯$ð.›¤¿Þé+áØlÕäö%ôÏt‘Éx]¦FS0\yŠr`Œšôòé¨Â­úÅÿé2ó_©[/Ä86|!ðßüKth¶}«þ=õ3ÉÍ©ýóy¹þ/¿·ýš·ömš›û'@‡dö£Ì¶Õ<Ù"ÿ‰ƒñý¡·©Ï'kà³r6ádø•¤Û[ü!ÑvIá“ö_?ËûìÏ»uÂgìÛ˜ùŸíïÎÞqŠÚ´Ñôk SÂ7£ÂsÇEI§j7?˜×rym´˜tYKÉùdà ögˆ-m^ª]ûÕþ÷ŸŸÏ[æ–ß×cÛ(¢Šù3sÇüCÿ'Cá?ûIÿ ÝW°WªÞøQ>2èv—zeÜž({&k;ÕcåG&ʰÞp$þ÷‡>åQEQEyÿÁ/ù$:ý¼éD•èËü;¸Ð®ü ¦Ïá«)ì´†ó~Ïä—LJá²K7VÜzž¿…uQEWü_ÿ’‡ðÇþ§ÿFÛ×°Wü_ÿ’‡ðÇþ§ÿFÛаQEW¨øíÿ4ŸhùÙömö?#>fD£výÜ­é´ýß~; ãõ}ƒâ†“à¿ìï3ûBÑ®~ÙçãËÀ”íÙ·Ÿõ]w½íÈaEP?áïùx³þ±ÿé­t£]}£Uñ _g‚/³j ø“kK›XsŸâoŸnºª;VÅã? üA¥Ocá(¬l–åšâ‘t•GGmvyĆ Œ¾Ò_¡TÆO³W”x2ûUÖµ=X–Gm{{{tÖÓÛ7ÙÖ9 ‘üÌyJIòÛnç µé`\*ó+üŸ“ýŸCø•â *óá‹ö{(þßçý›ËÒV/+eÂoÙ‰›ÉÎ9ÆýýöÕëh·×>¶¶Ótø¦»¹´’‹DHZ5+‰ØÄ2ò€ÙÉ9ˆZ?¢ÕWá‹ö‹ßI·ÏûOÛ-<ÜÜ&Ï´æC³ÁÙãkZ95« CÂ\Üx¢î2§‹UÓœ[³=ë y‰˜„‘C)L†å"=À´ãG–®½%Õ÷«ýßNßåš¾Ÿ×cÜ(¢ŠùssÇüCÿ'Cá?ûIÿ ÝW°W¨øëì4ŸÿgyŸÚsöÏ?^§nͼÿªë¸}ïn{ (¢Š(¢Šóÿ‚_òHt/ûxÿÒ‰+Ð+ŸðO‰ÿá1ð…Ž¿ö?±ý«ÌýÇ›æmÛ#'ÞÀÎvç§zè(¢Š(®?Æÿ„¯Ä>Õ¿´~Ëý‡wöŸ+Èßçüñ¶ÜîÕã8=}«°®?Æ:ÿ„SÄ>Ò³¾Õý¹wöo7ÏÙä|ñ®ìm;¿Ög=è°¢Š(¯ñü„ÿì'þƒu^Á\«ã]JÇã.‡àø ´:}ý“\K##y¡€˜áNìc÷kÔ§ðï(¢Šçü=ÿ!ÏØV?ý"µ®‚²ô­JkíG\·•c az¶ñ%M¼2å¹ëºFc€>§R€ ò†òð—üŽ_vOøùÿOøöïÓùåþÖêõŠòOêúÞµwáýJöÃÄQ-íÕäó3Ý9µUh#e; êI$F¥¾RÌÕè`bÜj[³ü™èa|Qÿ’Caÿ#—ü´ÿÿúøOøüÿÚ…ij?{Âßò?ÿÇå—ü„¿ãßþ?ÿåçý¿îûyUÏüJ—Uo„:/Ú,¼Mï?í?l»gò±p›>Ó˜ÆüÿvãŒn­«McÄf©áÛMñb¤­½Ãj7²I~mÜ‘=|¥Üû\$®3›Ô)rÕÛiu]êyùWD_oë±í”QE|Ù±ãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ƒÕ|k©XüeÐü‡O¿²k‰ddo40)ÝŒ~ízƒÔþåQEQEyÿÁ/ù$:ý¼éD•èËü;ñ%狼 ¦ë—ñÁÕ×›½ Rm•Ð`OE먠Š( ¼âÿü”?†?ö?ú6Þ½‚¸?ø×R𿊼¥ÙAi$Õé·¹i‘‹"ï‰r˜`ć¨=w”QEæúÏ…µ›¿>ñ,{ô‹M=ážãÍA±ÊÎÚNã÷× ïõ¯H¯+×umJÚ7Ã\Z…ÚióiŽòÚ,Ì"vÛqË&pOʼ‘ØzPªQE£XÜZj¾!šxöGw¨$ÐÀïAkdñÓæFúzb¶+Bžiµ¤²È馉³~ÉnØ_A¹˜àw$÷­Ê+Æ~øRM Çwy -ÝÕá–ÞÖyÄm2 S"•ùÇ@0GZöjò¿‡SÍu§x^ââ_M<¾sÉ$LZõØÛÆIòn æcé¶½ åÔå{ÅþL‰-Žâ6ƒ§Ân¡ Kö´mû-㿾á3ö|ŸŸÇžœâµ-¼ïªx1¬ïü1s-›ÂþU¦ í,ˆ—²Híc‡P»‰Ï9W ¥ø£ÿ$†ÃþDßùiÿõðŸñçÿµ?Õºžki|.ðKàHœÜÚ!m%ŠÜ•kí¬-±ÿ,Ê’w̽Yâ*òÕ÷·Rüêÿ›ûÈIiýv=ŽŠ(¯™6<ßYð¶³wñçÃÞ%‚Ï~‘i§¼3Üy¨69YÀIÜ~úôþµéåzî­©CûFøcK‹P»M>m1Þ[E™„NÛn9dÎ ùW’;JõJ(¢Š(¢Šãþ蚇>i:N­oö{è<ï2-êûwLì9RAàƒÁ®Â¸„÷šŸÂÝòþî{»©<ýóO!‘ÛÈXòp…wQEW›üG𶳯xËÀ—úeŸŸk¥êkÇóQ|¤ó!9ÃOÝ3Ò½"¼¯âž­©iþ:øwoe¨]ÛAw©”¹Ž™eó`paˆÁõ>´ê”QEÁê¾ Ô¯¾2è~0Š{A§ØY5¼±³·šX‰†TmÆ?x½Hèò¼¯]Õµ(hß iqji§Í¦;Ëh³0‰ÛmÇ,™Á?*òGaé@©EP>cqiªø†iãÙÞ “@w½¬“ÇO™sééŠØ®Ãßòñgý…cÿÒ+Zè(¯ð ¬ÚF«áÝ"ãTЦžÎþþÖD¶°*ìÑÛÄ¥VCÊFY÷ À¹ñí5㟠®¬.áð¬òx’ÚîþYn]âmVYf6ÑÍ·;×;š\æLà“Šôòæ”*Ý}—Òÿf^N߇¯Uèq¿çßð‡Ãñ2Ñ¥ßö¯ø÷Óü¿; þ§÷+åcø¾æïö«NÊ;OXðE½®¥ ]IºÌVÚh†I;ÙÚ'0&ÅUF%w.J¶-óGñ)´£ð‡Eû>¿esŸ?ìÞ^ˆ°}¯ý!7ì³ìïŒoÇ9Í^¸}Üø8[x›O»˜ÜÚyÅáä·k…þÐ9e(0€w £ØOñœû3”yk{¯i}—Þ¯÷4ü6égËšéývó=âŠ(¯“78=WÁZ•÷Æ]ÆOh4û &·–6vóK0ʸÇï©ãÞW•뺶¥íá.-Bí4ù´Çymf;m¸å“8'å^Hì=+Õ(¢Š(¢Š(—øwá»ÏøMÐïä‚K«_7{ÀÄ¡Ý+¸Á Œ;WQ\? ûÍOányw=ÝÔž~ù§Èí‰ä,y8»Š(¢Š+ƒñß‚µ/x«ÁÚ¥”ö‘Á¢Þ›‹•™Ø3®ø› … œFz‘ÔWy^WñOVÔ´ÿ|;·²Ô.í »ÔÊ\Ç Ì‹2ù° 8 0Ä`úŸZõJ(¢€ àõ_ é·?t?KâH5 [&Š-%‚ù³© ëóƒº)û‡ŸNò¼Ä?òt>ÿ°TŸú Õ{Q@zUœ6ÚŽ¹4WqÎ÷W«,±®3 xScryÚŠüã‡w:•Ïø{þCž,ÿ°¬úEk]æ~ ³‡OþÀ²·»ñ}äó\E̸ûʰFœ>Î@Ì<žKrkÓ+¾é^T~¸ÿ„Y‡l÷’ý¦Y3﵄y„yCä|aî?3t– P«wö_nÏ»DO ÏŠ?òHl?ärÿ–Ÿñÿÿ_ ÿŸûOð­Õ³†úóÃ0Ïwã»TY!”I«cìÅ’éRç$~ñ™B'ûÐðzâ•ö„>ÿŠY¶û/Ú¿ãâLý“u®ýÒçðýοÅVÿ±~ͨøoøEõûO"{UóîeÌvùÔí”y –;²9^x?ÅëNŒyjûÛ)vïWûÞ^{ú_4öþ»EQEòæçªøoM¹øË¡ø‚_ÚA¨ZÙ4Qi,ÍH˜o_œ|íÑOÜ<úw•ãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø(¢Š(¢Š(—øw£Ùè>ÓtË Z ZÖ7eìÙ.ev8Ã0à’:ž•ÔWŸüÿ’C¡ÛÇþ”I^@Q@p~;ðÞ›­ø«Á×·¾!´Ó'Ó¯L¶ÖÓÝzÛâ;.§9@8 ÷ǽåxÿÅÿù( ì*ôm½{Q@pz­—…ã.‡ww©ÝÇℲe³²U>T‘bl³„gOâtqëÞWø‡þN‡Âö “ÿAº `¢Š(/JŽÅ5q­&’IÞõZñXq¿g„^XŒ÷å=†¥sþÿç‹?ì+þ‘Z×A@yŸ‚|?cgý>—áIOŠk‰muI5=Ìbx#T¸1ñ“2€6`lÆp3^™^ð® ±ø=ÿ³tdÄ÷æÅ¨n˜fÖ˜'œríÑ×iÚ;S9>–7 ¶v÷_~Ï´£úú=œK ÏŠ:Ù>ØÅö±yŸóó³wÜ'¿ï¼ÏÇnku|/coyᙵɤAk$!oWóœ¿jfŠ-œùädËc1ämÈá~!Á³á†ÿâ[£E³í_ñï¨yžNnýOï›ÍÏñ}ý¿ìÕ¿³lÔ| ßÙ:;'µe¶©æÉüLˆ—í ½Ny;_›‘· ëNå«ïô—WÞ¯ý<ÿ=úݹæžß×o#èª(¢¾\Üàõ[/ ?Æ]îïS»Å dËgdª|©"ÄÙf;Î ŸÄ>èã×¼¯ñü„ÿì'þƒu^Á@Q@Q@¿Ã»} ÓÀšl½ž÷H_7ìóÎwÌ®[ ªômàéø×Q^ðKþI…ÿoúQ%zQEÁøîË—>*ðt¾ Ôîí5oKiQB¤¬òï‹å|#`nŽ«ÔóéÞWü_ÿ’‡ðÇþ§ÿFÛаQEU946mRR]>ÑõSdWm ™Qyá_ænî}jåqú޾ÁñCIð_öw™ý¡h×?lóñåàJvìÛÏú®»‡Þöä°¢Š(/J¼†çQ×!ŠÒ8Öõb–EÆgco ïn;]SœðƒžÃR¹ÿÈsÅŸöÿH­k  ¼kÁ•ž¡®è7:uÖÆ¥¨½½œZ`ŠTŒÛÆV0D + #y †ŸË^OðÏ\ó´o ZÂeöË$`þÌٿ˶Œùf8ò³»ñîÇjô° ¨U²¿ºûö}£/ÓÕn¢] âûþøoþ&Z4»þÕÿú—çbá?Ôþå|¬ÜÝþÕk隬6šß‚^{¿j Ž!o˜"wg¼•H˜Û †ÆS&6þöLßuϵü!°ÿŠËíßmó?æåil¸OoÜù†ìV–£â?¶·…­ÿá>þÑûUå“ý—ûÉûn/ñ¿~?w·f1ßÊÏñW­9Ë–¯¹Ò]z¿ôïü¶éf¡š[]¼Ïf¢Š+åÍÎ?Q×ü7oñCIÐît7Ä76-­ÿÙ£o*0%Êù„ï^ø7¹®Â¼Ä?òt>ÿ°TŸú Õ{QEQEsþ Õt}oÂ:Žaö 2o3ɶòR-˜‘•¾T%FX1ãÖº óÿ‚_òHt/ûxÿÒ‰+Ð(¢Š(ªwšN›¨\ZÜ^éö—3Ú>ûi&…]¡lƒ”$eNTAéW+ñ‡Ž¿áñ†4Ÿìïµn]ý›Íóöy(i> þÎó?´-çíž~<¼ NÝ›yÿU×pûÞÜöåzî“©MûFøcT‹O»}>1Ò[µ…ŒHÛn8gÆù—‚{ZõJ(¢Š(¢ŠçüâøL|!c¯ýìjó?qæù›vÈÉ÷°3¹éÞº áþX^iŸ tk;ûIí.£ó÷Ãÿ°TŸú Õ{Q@ú5õÅÞ«âg“|vš‚CÚÄ6°HG~gcϯ¦+b±ôk‹MWÄ3OÈîõš¸è-`Œž:|ÈßOLVÅä®ìµ&ðÖ¡>‰d5+›‹¹ä¼]eü÷·ŒË(„ZLàÆîñœ ׯ׉|9Ò®4«ß YÞi:5½Õ­åôS4Zˆ’Tqm± '`\C¨S´v¦r}<¹' ·eõ·Ù—š¿ãéÑÄúß—Jt_³èVØóþÍåëk?Ù?Ò~Ì1ûFþøÎÌóŒV´wzn¨xB}3ú}ÓIÜEâîZÞ)/^9cÜ|àÊÒ|À¾aäãšø‡Ï„>ÿ‰nϵǾ¡æy9¸Oõ?¾o7?Å÷öÿ³ZvZEÅî±àˆítMd‰¡”µ¶ªH’;Ù$)MÃy‹µX“‡ä°…ögòÖ÷žÒûO½_ïëøïÖï›5Óúíä}EWÉ›žo¬ø§Y´øóáï Ay³H»ÓÞiíü¤;Ü,äÄnqzßZôŠó}gÂÚÍßÇŸx– =úE¦žðÏqæ Øågm'qûëÐwúפPEPEPð·[Ô|GðãIÕµk´_Oçy’ìTݶgQ€vÇü-Ñ5|8ÒtZßì÷ÐyÞd[Õöî™Ør¤ƒÁƒ]…QEãÿÿä¡ü1ÿ°©ÿѶõìãÿÿä¡ü1ÿ°©ÿѶôìQEãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ÇüCÿ'Cá?ûIÿ ÝP°QE‡¡O4ÚljÒYdt‡SD‰Y‰¿d·l/ ÜÌp;’{ÖåeéZlÖ:޹q+FRþõn" NB‹xbÃq×tlxÏ}¥å§šëNð½Åľšy|ç’H˜µë±·Œ’ äÜÌÇÓmz¥yGƒ/t­/SÐü;½”—6—·ºzܮޱ;I‡pþsfË`—èBã'ÐÀÆRK.òdK¡Ï|Qÿ’Caÿ"oü´ÿúøOøóÿÚŸjÝO5´¾x%ð$Nnm¶’ÅnJµöÖØÿ–eI;æa\çįiWŸt_³ØÙGöÿ?ìÞ^’±y[.~ÌLÞNqÎ7ïï¶¶­5m/Äz§„tí>ÃL‚æá¢¼†A¢¬#‚îIQÄÌbÈ…þP¬ c’7¾¤èÔå«î½ºyÔÿ'÷u§õØöÊ(¢¾lØò½wVÔ¡ý£|1¥Å¨]¦Ÿ6˜ï-¢ÌÂ'm·²gü«É‡¥z¥qú~ßñCIñ§ö—ýŸhÖßcò3ædJ7oÝÇúÞ›OÝ÷ã° Š( Š( áýæ§ð·F¼¿»žîêO?|ÓÈdvÄò–<œá]Åsþ ðÇü!Þ±Ð>ÙöϲùŸ¿ò¼½Û¤gû¹8Æìuí]QEãÿÿä¡ü1ÿ°©ÿѶõìãÿÿä¡ü1ÿ°©ÿѶôìQEãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ÅÓ™ÌwgŒm«×kKsàãs¨x¾HVæÐN—Ö.»hØÊÁ[pl##|G¢j m¬,õoOuì¥äÊwæd?é'8|vmÎ8Î+e¼Aᛩô%›Äž.‰-d†Wkù–XXÇre :©Ë¹ÀU+û¤W§,‹O~:©/z›}ënëÈËû7ÍËì¥ugð½¾ï'÷3Üè®Kþg„?è/ÿ’ÒÿñÂÌð‡ýÿòZ_þ"¾{ÛÒþe÷›ÿeã¿çÌÿðþGâù: ÿØ*Oý꽂¼‹QÔü!ñCIñ§ü%_ö}£[}û>SædJ7oÇëzm?wߎÃþg„?è/ÿ’Òÿñ{z_̾ðþËÇÏ™ÿà/ü޶Šä¿áfxCþ‚ÿù-/ÿGü,ÏÐ_ÿ%¥ÿâ(öô¿™}áý—ŽÿŸ3ÿÀ_ùmÉÂÌð‡ýÿòZ_þ"øYžÿ ¿þKKÿÄQíé2ûÃû/ÿ>gÿ€¿ò3> É!пíãÿJ$¯@¯0ð'мá?Xh‘x™oÒÛÌÅÊÙÊ÷HÏÓÝŽ½«¢ÿ…™áú ÿä´¿üE?mOù—Þ%–ãZº£/üÿ‘ÖÑ\—ü,ÏÐ_ÿ%¥ÿâ(ÿ…™áú ÿä´¿üE/oKù—Þ?ì¼wüùŸþÿÈëkÇþ/ÿÉCøcÿaSÿ£më³ÿ…™áú ÿä´¿üEy·ÄOhþ#ø‡ðãû*óíFª<ÏݺmÝ,ûÀg¡§´ä줾ò*à1T¢çR”’]Zi~G»QE¡Èãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ÇüCÿ'Cá?ûIÿ ÝP°QEÏø{þCž,ÿ°¬úEk]dé dÚž¼-RU™oÔ]—Æ_³@A^zl1ŽÜƒZÔWŒü/—U6>Yl¼L¶Û®I=Û5¸Œ[E±™|° DçË]ØSœé^Í^7ðÿDÓc¼ðÝö™{¢‹!w{qmÛ®„r[Ä äƒ2¨bò`ŠôòþNJ¼ßÊÿô—戟Cø•.ªßt_´Yxš=ÞÚ~ÙvÏåbá6}§1ùþ íÇÝW®'ÖšçÁÂçOñ|pµÍ¡ï¯áFþÐ8©‰C66à’¸>r¾#h1Ú|!Ð6ê¿aûFß²Þ;ùÛî?gÉùñüyéÎ+ZÏÚfŸ«x6æûPðíÄV­qǧ_»Í$y'–Ñ+: wÏË&:^¼ý-_I[GÞ¯Ÿ§ôóWÓúì{½Q_*nxÿˆäè|'ÿ`©?ôªö óÝ^o /Æý«mA¼HÖ m&B¿gX¶Ïß6s'cÔW¡PEPEPŸüÿ’C¡ÛÇþ”I^\ŸÃI4Y~ioáØnáÒ›äGvA”~õ÷nÁ#ïnÇ=1]eQEãÿÿä¡ü1ÿ°©ÿѶõìãÿÿä¡ü1ÿ°©ÿѶôìQEãþ!ÿ“¡ðŸý‚¤ÿÐn«Ø+ÇüCÿ'Cá?ûIÿ ÝP°QEÏø{þCž,ÿ°¬úEk]cèÓ[ɪø… µòd‹PDüÂÞsý–îü¥Wû™êMlP^3ð¿V¶–ÇÂ6ˆDŒ×”Ëq¹-¢$«•Ú%ç÷œ0ÀRq^Í^5àJÏP×t:ëFŠÆãRÔ^ÞÎ-0E*Fmã+"м†Ã ÏŒO.‚”*ߤ_þ“/'ú7±Ï|JÕ­®>è»#ðÈûWŸåýŽÊdÛ¶á3ömÊ<¿ö÷ãw8Í^¸×-.®| pøA^âæÑÐØéóÇ0P#÷ È/Êr‚O™Ž£8_çßð‡Ãñ2Ñ¥ßö¯ø÷Óü¿; þ§÷+åcø¾æïö«NÊþÝ5«Ýþ7hTAm§›y$s{"¤‘8·]еŠå7`A–ög‡-mŠ]ûÕþï——Ë[fžß×cè*(¢¾LÜñÿÿÉÐøOþÁRè7UìÇê:ÿ†íþ(i:Î‘æø†æÑ¥µ¿û4måF¹_0ëÂ?cæ÷5ØPEPEPŸüÿ’C¡ÛÇþ”I^\ÿ‚u][ð…Ž£ X}ƒL›Ìòm¼”‹f$eo• Q– xõ®‚€ (¢€ ñÿ‹ÿòPþÿØTÿèÛzö ñÿ‹ÿòPþÿØTÿèÛzö (¢€ ñÿÿÉÐøOþÁRè7Uìãþ!ÿ“¡ðŸý‚¤ÿÐn¨Ø(¢Šçü=ÿ!ÏØV?ý"µ®‚²t‹ç»Ôõè(Y߬ ȸ. ´e½Nd#>€Õ­@xÏÂöÒ¾Ãá¿e-Î뀶ˢ,nÒ}š-ñ‰vä2 &s&pIÅ{5y€u=cU›Ã—×:oˆ-⺹»–LÜ¿ØÑÞ6OÇþ¤’DK»å áš½,¼*ÿ…öìû§øYüìDú·Ä¦ÒÂìúý•Î|ÿ³yz"Áö¿ô„ß³ >ϳ¾1¿ç5záôSsàámâm>îcsiäC‡’Ý®û@å–@ ÂÜ6Œga?ÆsGâTº«|!Ñ~Ñeâh÷yÿiûeÛ?•‹„ÙöœÆ7çø3·culAªë¶:—„e›JñW•#à ÁÔïX#2^<¾SîR„•Çî¸;~oZq|µué.°ïWû¿•¼¾Í³]?àùÝEWË›ž?âù: ÿØ*Oý꽂¼÷WñUͧÆýÃ+a§½½åƒN×O 7BÎp¯œû±Æ;ŸZô*(¢Š(¢Šóÿ‚_òHt/ûxÿÒ‰+Ð+“øi®MâO‡Ú^­qkik-Ç›ºHÊD»euùT“»“ÏRk¬ Š( ¼âÿü”?†?ö?ú6Þ½‚¼âÿü”?†?ö?ú6Þ€=‚Š( ¼Ä?òt>ÿ°TŸú Õ{yߎþÍâïØøƒNñ%Þ‰¨ZÛ›2˹b •e*~w“GNrè”Wÿ ñ‡ýsò›ÿÑÿ ƒÆôVuÏÊoþ?@‰¡A4:ljÞX¤D›SG‰™H¿d·\¯¨Ü¬2;‚;Våxÿü*ÑY×?)¿øýð¨ÜÎÛÑ#þWݹH gÙÞþæÎ 9üys-­¾ï&,™’-Ç-µLØ<œu©mÿgýVÎ{yí¾ ÞÁ5²”‚H­Z%%‰ DÙPK7ûÇÔ×l³Y55ü×ïÕÍÿ?÷üþzóO&ǸÑ^?ÿ ƒÆôVuÏÊoþ?Gü*ÑY×?)¿øýxæ…ÍwIÔ¦ý£|1ªE§Ý¾Ÿ˜é-ÚÂÆ$m·3ãüËÁ=Ç­z¥xÿü*ÑY×?)¿øýð¨èÖwö“Ú]Gçï†xÌn¹žB2§‘Aük¸¯ÿ…Aãú+:çå7ÿ£þŒ?è¬ëŸ”ßü~€=‚ŠñÿøT0ÿ¢³®~Sñú?áPxÃþŠÎ¹ùMÿÇèØ+Çþ/ÿÉCøcÿaSÿ£mèÿ…Aãú+:çå7ÿ©4ÿ‚ú’ø—HÕµßjZÊi—â(g‰Ü`<´UÎ cŽõÊ(¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€ (¢€?ÿÙhpcc-1.4.1/hpl/www/references.html0000644000000000000000000002716011256503657013757 00000000000000 HPL References

HPL References

The list of references below contains some relevant published material to this work. This list is provided for illustrative purposes, and should be regarded as an initial starting point for the interested reader. This list is by all means not meant to be exhaustive.

The references have been sorted in four categories and chronologically listed within each category. The four categories are

Linpack Benchmark

  • LINPACK Users Guide, J. Dongarra, J. Bunch, C. Moler and G. W. Stewart, SIAM, Philadelphia, PA, 1979.
  • Performance of Various Computers Using Standard Linear Equations Software, J. Dongarra, Technical Report CS-89-85, University of Tennessee, 1989. (An updated version of this report can be found at http://www.netlib.org/benchmark/performance.ps).
  • Towards Peak Parallel LINPACK Performance on 400, R. Bisseling and L. Loyens, Supercomputer, Vol. 45, pp. 20-27, 1991.
  • Massively Parallel LINPACK Benchmark on the Intel Touchstone DELTA and iPSC/860 Systems, R. van de Geijn, 1991 Annual Users Conference Proceedings. Intel Supercomputer Users Group, Dallas, TX, 1991.
  • The LINPACK Benchmark on the AP 1000, R. Brent, Frontiers, 1992, pp. 128-135, McLean, VA, 1992.
  • Implementation of BLAS Level 3 and LINPACK Benchmark on the AP1000, R. Brent and P. Strazdins, Fujitsu Scientific and Technical Journal, Vol. 5, No. 1, pp. 61-70, 1993.
  • LU Factorization and the LINPACK Benchmark on the Intel Paragon, D. Womble, D. Greenberg, D. Wheat and S. Riesen, Sandia Technical Report, 1994.
  • Massively Parallel Distributed Computing: Worlds First 281 Gigaflop Supercomputer, J. Bolen, A. Davis, B. Dazey, S. Gupta, G. Henry, D. Robboy, G. Schiffler, D. Scott, M. Stallcup, A. Taraghi, S. Wheat from Intel SSD, L. Fisk, G. Istrail, C. Jong, R. Riesen, L. Shuler, from Sandia National Laboratories, Proceedings of the Intel Supercomputer Users Group 1995.
  • High Performance Software on Intel Pentium Pro Processors or Micro-Ops to TeraFLOPS, B. Greer and G. Henry, Proceedings of the SuperComputing 1997 Conference, ACM SIGARCH - IEEE Computer Society Press - ISBN: 0-89791-985-8, San Jose, CA, 1997.

Parallel LU Factorization

  • Communication Complexity of the Gaussian Elimination Algorithm on Multiprocessors, Y. Saad, Linear Algebra and Its Applications, Vol. 77, pp. 315-340, 1986.
  • LU Factorization Algorithms on Distributed-Memory Multiprocessor Architectures, G. Geist and C. Romine, SIAM Journal on Scientific and Statistical Computing, Vol. 9, pp. 639-649, 1988.
  • Parallel LU Decomposition on a Transputer Network, R. Bisseling and J. van der Vorst, Lecture Notes in Computer Sciences, Springer-Verlag, Eds. G. van Zee and J. van der Vorst, Vol. 384, pp. 61-77, 1989.
  • The Distributed Solution of Linear Systems Using the Torus-Wrap Data Mapping, C. Ashcraft, ECA-TR-147, Boeing Computer Services, Seattle, WA, 1990.
  • Experiments with Multicomputer LU-Decomposition, E. van de Velde, Concurrency: Practice and Experience, Vol. 2, pp. 1-26, 1990.
  • A Taxonomy of Distributed Dense LU Factorization Methods, C. Ashcraft, ECA-TR-161, Boeing Computer Services, Seattle, WA, 1991.
  • The Torus-Wrap Mapping for Dense Matrix Calculations on Massively Parallel Computers, B. Hendrickson and D. Womble, SIAM Journal on Scientific and Statistical Computing, Vol. 15, pp. 1201-1226, 1994.
  • Scalability Issues in the Design of a Library for Dense Linear Algebra, J. Dongarra, R. van de Geijn and D. Walker, Journal of Parallel and Distributed Computing, Vol. 22, No. 3, pp. 523-537, 1994.
  • Matrix Factorization using Distributed Panels on the Fujitsu AP1000, P. Strazdins, Proceedings of the IEEE First International Conference on Algorithms And Architectures for Parallel Processing ICA3PP-95, Brisbane, 1995.
  • The Design and Implementation of the ScaLAPACK LU, QR, and Cholesky Factorization Routines, J. Choi, J. Dongarra, S. Ostrouchov, A. Petitet, D. Walker and R. C. Whaley, Scientific Programming, Vol. 5, pp. 173-184, 1996.

Recursive LU Factorization

  • Locality of Reference in LU Decomposition with partial pivoting, S. Toledo, SIAM Journal on Matrix. Anal. Appl., Vol. 18, No. 4, 1997.
  • Recursion Leads to Automatic Variable Blocking for Dense Linear-Algebra Algorithms, F. Gustavson, IBM Journal of Research and Development, Vol. 41, No. 6, pp. 737-755, 1997

Parallel Matrix Multiply

  • Matrix Algorithms on a Hypercube I: Matrix Multiplication, G. Fox, S. Otto and A. Hey, Parallel Computing, Vol. 3, pp. 17-31, 1987.
  • Basic Matrix Subprograms for Distributed-Memory Systems, A. Elster, Proceedings of the Fifth Distributed-Memory Computing Conference, Eds. D. Walker and Q. Stout, IEEE Press, pp. 311-316, 1990.
  • The Parallelization of Level 2 and 3 BLAS Operations on Distributed-Memory Machines, M. Aboelaze, N. Chrisochoides and E. Houstis, CSD-TR-91-007, Purdue University, West Lafayette, IN, 1991.
  • The Multicomputer Toolbox Approach to Concurrent BLAS and LACS, R. Falgout, A. Skjellum, S. Smith and C. Still, Proceedings of the Scalable High Performance Computing Conference SHPCC-92, IEEE Computer Society Press, 1992.
  • A High Performance Matrix Multiplication Algorithm on a Distributed-Memory Parallel Computer, Using Overlapped Communication, R. Agarwal, F. Gustavson and M. Zubair, IBM Journal or Research and Development, Vol. 38, No. 6, pp. 673-681, 1994.
  • PUMMA: Parallel Universal Matrix Multiplication Algorithms on Distributed-Memory Concurrent Computers, J. Choi, J. Dongarra and D. Walker, Concurrency: Practice and Experience, Vol. 6, No. 7, pp. 543-570, 1994.
  • Matrix Multiplication on the Intel Touchstone DELTA, S. Huss-Lederman, E. Jacobson, A. Tsao and G. Zhang, Concurrency: Practice and Experience, Vol. 6, No. 7, pp. 571-594, 1994.
  • A Three-Dimensional Approach to Parallel Matrix Multiplication, R. Agarwal, S. Balle, F. Gustavson, M. Joshi and P. Palkar, IBM Journal or Research and Development, Vol. 39, No. 5, pp. 575-582, 1995.
  • A High Performance Parallel Strassen Implementation, B. Grayson and R. van de Geijn, Parallel Processing Letters, Vol. 6, No. 1, pp. 3-12, 1996.
  • Parallel Implementation of BLAS: General Techniques for Level 3 BLAS, A. Chtchelkanova, J. Gunnels, G. Morrow, J. Overfelt and R. van de Geijn, Concurrency: Practice and Experience, Vol. 9, No. 9, pp. 837-857, 1997.
  • A Poly-Algorithm for Parallel Dense Matrix Multiplication on Two-Dimensional Process Grid Topologies, J. Li, R. Falgout and A. Skjellum, Concurrency: Practice and Experience, Vol. 9, No. 5, pp. 345-389, 1997.
  • SUMMA: Scalable Universal Matrix Multiplication Algorithm, R. van de Geijn and J. Watts, Concurrency: Practice and Experience, Vol. 9, No. 4, pp. 255-274, 1997.

Parallel Triangular Solve

  • Parallel Solution Triangular Systems on Distributed-Memory Multiprocessors, M. Heath and C. Romine, SIAM Journal on Scientific and Statistical Computing, Vol. 9, pp. 558-588, 1988.
  • A Parallel Triangular Solver for a Distributed-Memory Multiprocessor, G. Li and T. Coleman, SIAM Journal on Scientific and Statistical Computing, Vol. 9, No. 3, pp. 485-502, 1988.
  • A New Method for Solving Triangular Systems on Distributed-Memory Message-Passing Multiprocessor, G. Li and T. Coleman, SIAM Journal on Scientific and Statistical Computing, Vol. 10, No. 2, pp. 382-396, 1989.
  • Parallel Triangular System Solving on a Mesh Network of Transputers, R. Bisseling and J. van der Vorst, SIAM Journal on Scientific and Statistical Computing, Vol. 12, pp. 787-799, 1991.

[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/results.html0000644000000000000000000001720311256503657013334 00000000000000 HPL Results

HPL Performance Results

The performance achieved by this software package on a few machine configurations is shown below. These results are only provided for illustrative purposes. By the time you read this, those systems have changed, they may not even exist anymore and one can surely not exactly reproduce the state in which these machines were when those measurements have been obtained. To obtain accurate figures on your system, it is absolutely necessary to download the software and run it there.


4 AMD Athlon K7 500 Mhz (256 Mb) - (2x) 100 Mbs Switched - 2 NICs per node (channel bonding)

OS Linux 6.2 RedHat (Kernel 2.2.14)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPIch 1.2.1
BLAS ATLAS (Version 3.0 beta)
Comments 09 / 00

Performance (Gflops) w.r.t Problem size on 4 nodes.
GRID 2000 5000 8000 10000
1 x 4 1.28 1.73 1.89 1.95
2 x 2 1.17 1.68 1.88 1.93
4 x 1 0.81 1.43 1.70 1.80


8 Duals Intel PIII 550 Mhz (512 Mb) - Myrinet

OS Linux 6.1 RedHat (Kernel 2.2.15)
C compiler gcc (egcs-2.91.66 egcs-1.1.2 release)
C flags -fomit-frame-pointer -O3 -funroll-loops
MPI MPI GM (Version 1.2.3)
BLAS ATLAS (Version 3.0 beta)
Comments UTK / ICL - Torc cluster - 09 / 00

Performance (Gflops) w.r.t Problem size on 8- and 16-processors grids.
GRID 2000 5000 8000 10000 15000 20000
2 x 4 1.76 2.32 2.51 2.58 2.72 2.73
4 x 4 2.27 3.94 4.46 4.68 5.00 5.16


Compaq 64 nodes (4 ev67 667 Mhz processors per node) AlphaServer SC

OS Tru64 Version 5
C compiler cc Version 6.1
C flags -arch host -tune host -std -O5
MPI -lmpi -lelan
BLAS CXML
Comments ORNL / CCS - falcon - 09 / 00

In the table below, each row corresponds to a given number of cpus (or processors) and nodes. The first row for example is denoted by 1 / 1, i.e., 1 cpu / 1 node. Rmax is given in Gflops, and the value of Nmax in fact corresponds to 351 Mb per cpu for all machine configurations.

CPUS / NODES GRID N 1/2 Nmax Rmax (Gflops) Parallel Efficiency
1 / 1 1 x 1 150 6625 1.136 1.000
4 / 1 2 x 2 800 13250 4.360 0.960
16 / 4 4 x 4 2300 26500 17.00 0.935
64 / 16 8 x 8 5700 53000 67.50 0.928
256 / 64 16 x 16 14000 106000 263.6 0.906

For Rmax shown in the table, the parallel efficiency per cpu has been computed using the performance achieved by HPL on 1 cpu. That is fair, since the CXML matrix multiply routine was achieving at best 1.24 Gflops for large matrix operands on one cpu, it would have been difficult for a sequential Linpack benchmark implementation to achieve much more than 1.136 Gflops on this same cpu. For constant load (as in the table 351 Mb per cpu for Nmax), HPL scales almost linearly as it should.

The authors acknowledge the use of the Oak Ridge National Laboratory Compaq computer, funded by the Department of Energy's Office of Science and Energy Efficiency programs.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/roll.jpg0000644000000000000000000023135011256503657012420 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀ`¹"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú˃Ä:mωoU³“Í9;&É£MT©7k´®öWêýïþߟóõ¡ÿàõ¿øõð†ü ÿŸ­ÿ­ÿÇ«Ãþßáßî[ÿà1ÿâhû‡¹oÿ€Çÿ‰®O­Kþ}³ßþÁ¡ÿAtþõþg¸Âð3þ~´?ü·ÿ« l´-?Ç~;µðÓÀúBgýœÁ9™brØrN~bÝøé^öÿÿrßÿÿ^¹û<Ém.¥âç´ ?cÛµvŽ’ç®kZUœÝœZõ8±ùe<-58WŒõµ¢îúê{¥Q[žIóÇÿ |)Ô< ¦Ýx–ãJM]üß´ õf…Æ%p¹A ÇÊ·=k§ÿ„7àgüýhø=oþ=\Çÿü)Óü ¦Úø–ßJ}]<ß´ô–™Îer¹qÏÊW¿+§ÿ„Ëàgüúèø"oþ3@ü!¿?çëCÿÁëñêÃ×´†úV³á ü6šúƒxŽÉ$º‘¸o+q'*]°7çžµ¹ÿ —ÀÏùõÐÿðDßüf°õíᾫ¬øB榠¾#²y ®šmÛÊÜAË\Åxϧ¥{åQ@7ã]3š¯ÆÈ`ñ„–‰§¯‡ã7WfÝ|ß´°`Ë“´·õô©?á øÿ?Zþ[ÿT~5Ôü)¥|l†Ghú{xqR1uhnÍûK… Ø;Cs_Z“þ/Ÿóë¡ÿà‰¿øÍð†ü ÿŸ­ÿ­ÿǨÿ„7àgüýhø=oþ=Gü&_?ç×CÿÁñš?á2øÿ>ºþ›ÿŒÐ§Áhlm´oÁ¥˜ÎŸˆî’ÔÇ&õ1Œ&'pÛŽrs^™^gðZkÄóéb1§Éâ;§µDZDEc)…ÀÚ6㌠W¦P_<|;ð×ÂCÀšm׉n4¤ÕßÍû@ŸVh\bW ” |¡{sÖ¾‡¯ž>ø—áNŸàM6×ĶúSêéæý Ï¤´Îs+•ˈÎ~R½øé@?ü!¿?çëCÿÁëñê?á øÿ?Zþ[ÿQÿ —ÀÏùõÐÿðDßüføL¾Ï®‡ÿ‚&ÿã4â |)ÓôQuá«)õt»µû8ƒViœæxÃa ‡?)nÜu¯x¯ñ?‰~ê(µðÕ¾”š»ÝÚýœÁ¤´.1 Mcs£xž},F4ù1³ÿ„gÄz¦ÿÞËçi–|k’£imà ÆqèG­uð»~ÐÃÿ’Wünø]¿?èaÿÉ+þ7Gü-?ú¼sÿ‚þÎøZõ!xçÿÿýqÿ¾)x7Ä5m'IÖ~Ñ}?“åÅöY“vÙ‘,€<š÷ ðÿŠ^?þÛøq«ißð‰x®ÃÎòÒoôß*ÄÈß3n8Î0=ȯp ¼¿âF·§xsâ?€umZãìö0hù’ìgÛºQ‚O$z…yÄWûâ?€u°__ù?Ú?èÖù³>aEùW#8ÎO°4¡ÿ ·áçý ?ù%qÿÆèÿ…Ûðóþ†ü’¸ÿãtÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÐÿ ·áçý ?ù%qÿÆë?á¾·§xâ?µm&ãí3ÿgyrìdݶSÃG ŽEhÂÓÿ© Ç?ø'ÿìë?ᾫý·ñÇÚØ/¬<ïìïôkø|©“ºüË“Œã#ØŠõ (¢€>aðGü;£ø>ÂÂÿQònbó7§‘#c21…#¡ÐÿÂÊðý¿òZ_þ&®ü-ñÿö'Ã'Nÿ„KÅwþOþ“a¦ù°¾fvù[pÎ3ƒî vð´ÿêBñÏþ ÿû:ð«pþ­IT”¥vÛÝuùŒ3:ЊŠKO_ó8/øY^ÿ ·þKKÿÄÖ‰> xvkêÒòK¯°k÷“G.¬cŒ’ØÜ'§ï^µÿ Oþ¤/ÿàŸÿ³®_Æ4ÿ„QðuŸü#>#Òöx–Ê_;S°ò#lCn9ns@}*°ù…XÕŒ¥uæ¿ÈUs*µ àÒ³õÿ28?i oElú-ÜK¸V¾’Pd@GÞ1(<ôbqÈüµ±¯ü|ð~›¥´ú<òk›Â­²Ç$ÙÓè Éc$v–þ𵞱g«Yh66wÖ{ü™m"ãzíl„Àn 08ÉÆ*ö¹ i>$Óņ³cå°‘eÈ::ž#‘Ü{‚Aà‘^Ùçž#£|`ðÖ¡ñMŸà¯tŸ ÜË ¾­4;fÜn]‘p1…Ÿ÷ElÂ×þ„Oÿà ÿñUããs e ¼”pîk½íú3HÂ-]»_ð»~ÐÃÿ’Wünø]¿?èaÿÉ+þ7Y¿ð°åÿ¡Æ¿ø(?üUð°åÿ¡Æ¿ø(?üUqÿkæ?ôÿð/þÔ¯gæ-|¾·Ôôï_ÙÉæZÝx–òh_i‘„eN# ޵éæ¿n æ•⛓öæo]ÉäÜ&É#ÊÆvºöaЎƽ*¾Š Ê)µfbáÿ ~)x7ß4'VÖ~Ï}æEöYŸné‡*„<÷ ðo†l<;ð÷KÒ¦ðÿˆ®äƒÍÌÖvK$MºWo•‹Œã8#øVÕ®>Ïcö™.Æ}»¡E($ò@àW¨W—üHÕ±>#øQûõÿ“ý£þa›3æ_•r3ŒäûTQ¡ÿ ·áçý ?ù%qÿÆèÿ…Ûðóþ†ü’¸ÿãtÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÐÿ ·áçý ?ù%qÿÆê¿ÁÛë}ONñeýœže­×‰o&…ö‘¹FTàò2ëV?áiÿÔ…ãŸüÿöu_àí×Û´ï^}ž{?Ä·’ù7 ²H÷Î×^Ì3‚;ôŠçüwÿ$óÄ¿ö ºÿÑM]sþ;ÿ’yâ_û]覠Âü ¦x7C°¼×|»«]>Þ“ì“®±¨a˜8 ô­ø]¿?èaÿÉ+þ7Xþø“öèvð…xÊãÈÓíâó­ô­ñɶ5‘·r§¸­øZõ!xçÿÿýð»~ÐÃÿ’Wün¹|D𯋵Xhz¯Úî£ñ-”ÌŸg–< , ˨X~uÔÂÓÿ© Ç?ø'ÿìë—ñ‡?á#Ô|gÿψô½ž%²—ÎÔì<ˆÛ†ÐÛŽ[œãÐJöŠ(¢€<ŸÄž)Ѽ#ñÖ;ýróì–²xhB¯å<™srH@OE?•lÂíøyÿCþI\ñºÇñ&¿ÿçÇXï?²5]S†„^N™mçȹ¹'q\Œ/Ï©µ±ÿ Oþ¤/ÿàŸÿ³ þoÃÏúòJãÿÑÿ ·áçý ?ù%qÿÆèÿ…§ÿRŽðOÿÙÑÿ Oþ¤/ÿàŸÿ³ à—ü’ þÞ?ô¢Jô óÿ‚_òHt/ûxÿÒ‰+Ð(¢Š(¢Š(¯ð?áÔ|cgÿψõMþ%½—ÎÓ,<ø×%FÒÛ†ŒãÐZöŠñëž*Ó5áø;ûjÕ¼Kzíqý§¶×Ê‚›\dàsïí@Gü-?ú¼sÿ‚þÎøZõ!xçÿÿýð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþÇüRñÿößÃ[Nÿ„KÅvw“þ“¦ùP¦&Fù›qÆqîE{…xÅ/xÊûáÆ­m«xû.ÅüŸ2óû^ü¼L„|Š2r@tÎ{W¸P^_ñ#UþÄøàGì×þOöú5„>lϘQ~UÈÎ3“ì z…yÄ‹½FÇâ?€nt/ûRù?´|»?´,fa@~và`yëŒw  øZõ!xçÿÿýð´ÿêBñÏþ ÿû:?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(ÿ…§ÿRŽðOÿÙÖÃ}Wûoâ?µ°_XyßÙßè×ðùS&!uù—'ÆG±¡ÿ oÄ?ú&ù_·ÿ Ïøow¨ß|Gñõέ¥ÿeß?öw™gö…ŸËÄ.μ€3ŽÔêQExÂßÿb|8ÒtïøD¼Wäùßé6o› ægo•· ã8>à×aÿ Oþ¤/ÿàŸÿ³®?áoˆ¼ecðãI¶Ò| ý©bžw—yý¯ ffr~F$Žzã=ë°ÿ„·âýü¯Ûÿ…ð´ÿêBñÏþ ÿû:åüaãOøHõYÿÂ3â=/g‰l¥óµ;"6Áa´6ã–ç8ôÒºøK~!ÿÑ0ÿÊý¿øW/ã sÅZž£àèuÏbÚ¯‰ln?´â¹Üù`j Œ‚N}½èÚ(¢ŠòkÿðŽ|uŽóû#UÕ7øhEäé–Þ|‹›’wÈÂñŒú‘ë[ð´ÿêBñÏþ ÿû:Çñ&£¬éŸc›Cж®›ÃAßí‰mµ>ÒI}Î0p@÷ö­øK~!ÿÑ0ÿÊý¿øPÿ Oþ¤/ÿàŸÿ³£þŸýH^9ÿÁ?ÿgGü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…Wø;uöí;Å—ŸgžßÏñ-ä¾MÂl’=Â3µ×³ àŽÆ½"¼ßàì×w‹&¼µû%Ôž%¼y­üÁ'”äFY7##®+Ò(¯—¼©ßÛøFÆ(|/â+Ø×ÌÅÅžžÒDÿ¼oºÀóއÜú†¼?áoˆ¼ecðãI¶Ò| ý©bžw—yý¯ ffr~F$Žzã=ë›…§‰‚…M¯s“‚¥Œ¦©ÕÙ;ÿ_yý³ªЕâßü7øÑý³ªЕâßü7ø× ÿÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øWö³ûÏ3ý\ÀöyäÞ'×/âÐešo xŠÖ8¥†C-å‹EÛ*6Îqœ`{‘]‰ûBÞjþ(‚ÓþIä±–&g°&æìÈmÊ0¡—•À#–ÝÆ+cÆž"ñ•÷†žÛVð'ö]‹ÝÚy—ŸÚðÏåââ2>E9 :g=«Õ#ÒtØuIµH´ûDÔ&M’ݬ*%uã†|d•x'°ô®ì.žö½ÏO‚¥ƒ¦éÒÙ»ÿ_qä~7øãy¢iÖßÙ¾Õl/¦—íëovͲXžØ'Ú¹ þ7Úk^<ðÆ·}¡\Z[hÿjÞ–ó‰Þ_6-ƒ„ý·?EêZN›¬Û­¾©§Ú_@®cº…ePØ# 0#8$gÜ×üyÒ5w]ðv›¤Úý¦úo¶ùpùŠ›°±1刀O'µvÑŒ%R*£´[W}—Vu;ÛCCþ#¿ôñþEÿÇ(ÿ†ˆð¯ýëÃÙ‰ˆ¬ç}¹fÜCÛ´(ÇÞÎ}©Oˆ„\¥NI/&)a«ÅsJ /F}EW1äþ$×ÿáøëçöF«ªoðЋÉÓ-¼ù7$î+‘…ãõ#Ö¶?áiÿÔ…ãŸüÿöuâMGYÓ>:Ç6‡ ÿm]7†‚5¿ÛÛj}¤’ûœ`à€1ïí[ð–üCÿ¢aÿ•ûð þŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þÎøK~!ÿÑ0ÿÊý¿øQÿ oÄ?ú&ù_·ÿ > É!пíãÿJ$¯@¯?ø%ÿ$‡Bÿ·ý(’½€ (¢€ (¢€ ñÿÂyý£ãøEÿáûü$·»ÿ´üÿ3ÌÊç_q·ß9¯h¯/Ó¼7ñÃšŽ»ý‡'…ÇRÕn5ûs\Ì#ìãžsÍhÅßÿ©ÿ&èÿ‹¿ÿR7þMÑÿþ¤oü›®N‰~/ŸÄ§ÃÉ­x j‚á­|—†ý𤂛ÙBç#žNÎEGñKþ?ü+[ûþOìÏÜùß`ûGþ¹6íßòýí¹Ïl׸W“ø§Ãü]á˽þOÇku³{À×AÆ×W$ÕGjõŠ+Ëþ$lÂÇðöØ´ÿâcäý¿“þ¥7nÙó}ÝØÇ|W¨Wã ø“Sñ†õÏ I¥-Ö‘ö¬¦¤Òo5: ÉÀ Üvë@ÿâïÿÔÿ“tÅßÿ©ÿ&èÿ‹¿ÿR7þMÖ~³®|Kðöœ÷ú½ÿÃû+UÈß3ÝÄv¨êÍ€p£$ã@ñwÿêFÿÉºÏøoý±ÿ ÇßÛÿaþÓÿ‰wö þOú—Û·Í÷vç=óRi:—ÅgF±Õ-×ÁKí¼w¬‚è0WPÀ3ƒêkSÁñ&™â?kž%“Jk­_ì¸M5¤(¾R2tq‘W¹ïÒ€;Š(¢€ÿ„£þϰÿÂKe³û3Ïó<̶3æq·½óŠ4ˆ¾2×õÇÑt½oáýÍúïÄj·À>ß½±ˆ ýÏÊN@$p3[†þ#øQп·$ð¢X麭¾ ßak#ydä àƒÁ|¿3îÙžvç8Ï8¯H®?áç†õiÚÇöä–/}©j³j ösù2ðä9ã×a@xÂßøXÿð®4ŸìøE?³?}äý¿íwúçÝ»gË÷·c±^á^Oáo üSð‡-4; <%­®ý;];œä€V=¨cþ.ÿýHßù7Gü]ÿú‘¿ònø»ÿõ#äÝsv9ø‹©øÖó–2ø*{û+s=Ĉ.ŒHC*˜÷g%Áa œ‚ž4ÿ…ÿÓÿoÿÂ)ý™ö»O;ìhó¿ãâ=»wü¿{nsÛ5ìæ:¾‰ñCÄ6MÔßÁédóÃ$­lnD˜ŽE“åÜ꣨¯N ¼¿âGöÇü,ÿ`}‡ûOþ&>OÛ÷ù?êSvíŸ7ÝÝŒwÅz…pþ7ð߉5?øo\ðÔšRÝijÊjM FóQS¢ œÝÇn´_þ.ÿýHßù7Gü]ÿú‘¿ònø»ÿõ#äÝsþ'ñ·¼ö_íýCÀÖjßäþæþMÛq»î)Æ7/_Zè?âïÿÔÿ“u_àïÛ?³¼Yý£ä}»þ[Ï´}Ÿ>_™ˆ÷lÏ;sœgœU}[øâ= [I¸ð5ÅŒû¼¹v^¦í¬TðÀÈ#‘]ÃÏ ëÓµíÉ,^ûRÕfÔì,æ5ód àÈR­‚qÔßñ®§ñŸJÑ¡žÞ=Ý®Э&¸ŸXüË"° Ç'ÎßS^ÉEq§Î¯Äë‰ &«o¥/ˆD1n¾ÁI-ÆàÇÈloÝòöáyäö?ð–üPÿž>ÿ¾nÆ´üià¿j~;‡Ä^ŸFU]1l]5”ù¬ä€‹î½ýx¬ÏøD¾(ÏoÿßW?á]ý‡/¿¹ÕGêü¿¼½ÃþߊóÇÁÿ÷ÍÏøÑÿ oÅùãàÿûæçük—ñ&¯âoê1Økš×ƒí.¤ˆL©äßI”$€r‘‘ÕOå[&ãÿèðjÚM÷ƒî,gÝåË‹´ÝµŠžA‚9§û7™¯û™×|ÿ’C¡ÛÇþ”I^\¿Ã¿ ÞxGÀšn‡$]Zù»Þ%é]Æ ôaÚºŠã8Š( ¸;ëé´ë÷pÜIwpt{‹M`pËçÌŒTE ÊBç9¹”îãnÞò±ï´kû»É'‡ÄÚ­”mŒAv¥ ÂÍÏ^IëéÅq~%¾»ñoß-ý¥Þ”l¬žâ&¶’î)¹*Þt†B üÀ†çiÚkÓ+ÄïþøêëÆúާoã©-,ïL·P»ÅpꉈÃÇDb¿w;‡lv×6Þ>ñÕ¾©«XMâ©dk ù­‰clÄmØ1œgÓ52’Š»5£Fu§É Ϥ+çýD×l,ü/à‰<)}æ“âX¯îu8aÎâ,Æ_7›kç ò ?ð°|kÿC5ÏþZÿñª?á`ø×þ†kŸüµÿãUŸ·Ùý—ˆì¾óèê+æ]WâgŽ,tÙ®bñ,åÓ imŽHóËÞ¾š­#5%trWÃ΄¹g¸QEygÅïëÞÔ¼7m¢êMd—¿jóÊÖØ#+÷Õ±÷OZmÙ\Îs’ŠÝž§\ß´˜uŸëVï§Ç}:ÙNö±´"VyL Á;òpçž+Åá`ø×þ†kŸüµÿãTÂÁñ¯ý ×?ø kÿÆ«/o¿û/Ù}ç¹x. ­| áë{ˆ¤†x´Ëd’9«#”AäxÅnWÎ?ð°|kÿC5ÏþZÿñªîþø¯^ñ¥âKmkRkÔ²û/ZЮñ!o¸«Ÿº:úUF¤dìŒkàªÐ4ö=NŠ(­@¢¼ßáÛxƒÅÞÓuËÿj±Ý]y»Ò {0ƒl®ƒÀOEë¨ÿ„{Tÿ¡Ï\ÿ¿6_ü@/†ç‹Qøhfðž·§Yi¦â #ÒMµ•º°c$îçk” *îÆ ÕëUÏÿÂ=ªÐç®ß›/þG®oųk¾¸ð̶þ(Ô®’ÿ]µ±ž+¨-v´N[p!Rˌ繠D¢Š(¢¸=JÿX¾ø¸¾·ÖîôýÒ5Kk+=í”7,pY è›rq“êkSþíSþ‡=sþüÙò=y¿ÂãKñmìšn®hÚ$ÚT"î-Z0¥¯U° l@f]¦CË…öŠçÿáÕ?ès×?ïÍ—ÿ#×7âÙµß \xf[jWI®ÚØÏÔ»Z'-¸ ©åÆsÜТQEQ\¥¬_|\_[ëwz~ž4!|VÖ(Œ¾yL“$oÆÓÓØ{çsþíSþ‡=sþüÙò=güF—ÄC±°ðãO Æ¥¨Ceqw +Ú@û·Ê0FÜ`|ÇÏÖA6¶ñ[ÛÅ0D#Ž5 ¨ `Æ+þíSþ‡=sþüÙò=ðjŸô9ëŸ÷æËÿ‘è ¢¸ÿ…ºÞ£â?‡N­«\}¢ú;Ì—b¦í³:Ž8p+° Š( Š( ¾DÖì>ÕãO¿Û.áÆµv6Ã.ÐxyÆ:××uá>øc¥xÓWñ†£}©êö²Åâ;ØYN¨„ ’ ~cß°¬«BS¢ìÎì»G_ÚW‡4m±åÿØçþ‚zýÿÿëQýŽè'¨ÿßÿþµ{‡ü(/ÐÄ¿øÿ£þ‡¿è?â_ü ÿ×'Õ«ÿ9ôÛY_ý?¼ðgMû>•4¿n½—nß’Yw)ù‡QŠûR¾ø‘ð‡Fð¿€u=fÓW×.'·ò¶Çurn•äðÄõëŠúº¨Bp¦îxYž+‰¬§‡‡"µ­çw¨W‡~ÐPý£Uð„^l‘nûoÏmaòÅÐ׸ו|RÐ-üQãïè×w6ð\hn’ÕÂH»bF$9P:tÍi4ÜZG&p§ZšºM6»«ìxwö9ÿ ž£ÿÿúÔcŸú ê?÷ÿÿ­^áÿ Ãßôñ/þGÿÆèÿ…áïúø—ÿ#ÿãuÃõjÿÎ}GöÖWÿ@Ïï…Óum7Y·k/P´¾\£Ik2Ê¡° RFpAǸ¨íuÝûQŸN³Õln/ Ýç[Cp${NÖÜ ä`zó üþÁÓ¤µÿ„ßÄqo”É2ãìqòå>|·{=01ÅaÏû7Ãu¬ËyqâÛ¹ –àË"ÉjwRÙ9”¾ ‘ü[zóŽÕÔxDŸ |ý·ðãIÔá-ñ]‡ç£Xj^T)‰~UÚqœdû“]‡ü*ÏúŸ|sÿƒþ¼§ÀzÝç‚ôùâñO‰,QüÌ[Ùê(“0ùWg>ä×GÿÍÿýþ0ÿÁ«…xõsÌ%)Êœ›ºvÛ±Ý º¼â¤­fvð«?ê}ñÏþ?û åüaà¿øG5^ÂMâ=S‰l¢òu;ÿ>5Éc¸.цãô'ÖªÂ3ÿC¿Œ?ðjßáYZžu§ëÞ–okÚ’ŸÙ¨‡P½3F§q;€#ïqŒúUC:Âר©Á»¿!TÀV§9l¢(¢ŠõŽ#ÉüI ÂGñÖ;?í}WKÙá¡/¦\ù6.HÚ[+Îqê¥l¬ÿ©÷Ç?ø8ÿì+”øc6¡ñž¡Õu-5‡‡•ŒÚ}Á†Fha´‘ü<ç UøFoÿèwñ‡þ [ü+ÌÅfØl-OgQ»útpUkGžŸü*ÏúŸ|sÿƒþÂøUŸõ>øçÿý…qŸðŒßÿÐïãü·øQÿÍÿýþ0ÿÁ«…sÿ¬.ïî5þÌÄv_yÕüµûâË?´Oqäx–ò/:á÷É&ÑÜíÝŽ2Os^‘^cðF&ƒAñ$O<Ó²x†éLÓ¾é$!c˜÷cÔŸZôêöc%(©.§Vva^ð·ÀÛ4GþߨyÞwú5†¥åB˜™×å]§ÆO¹5îáÿ |ý·ðãIÔá-ñ]‡ç£Xj^T)‰~UÚqœdû“T#°ÿ…YÿSïŽðqÿØQÿ ³þ§ßÿàãÿ°£þgýO¾9ÿÁÇÿaGü*ÏúŸ|sÿƒþ€9ÿxûÃO¨ÿÂ[â»ÿ&îÓýÿRóa|ÜF¿2íÆr=À¯`¯ñ§€?±<4úü%¾+¿òní?ѯõ/6ÍÄkó.Ñœg#Ü ö +Ëþ$i_Ûü§}¾úÃÎþÑÿI°›Ê™1 7ÊØ8Î0}‰¯P¯/ø‘¥müGðöûë;ûGý&Âo*dÄ(ß+`ã8Áö&€4?áVÔûãŸüö¬ÿ©÷Ç?ø8ÿì(ÿ…YÿSïŽðqÿØQÿ ³þ§ßÿàãÿ° þgýO¾9ÿÁÇÿaUþÚý‡NñeŸÚ'¸òÞ_&ßUÙ{£Sµo 3€; Øÿ…YÿSïŽðqÿØV?„þ}»Áºçü&¾2·óôûy|›}WdqîNÔ]¼(Îì+cþgýO¾9ÿÁÇÿa@ü*ÏúŸ|sÿƒþ¹x/þÍGÁןð“xTßâ[(¼NÿÏrXî ´a¸Æ} õ®£þgýO¾9ÿÁÇÿa\¿Œ<ÿæ£àëÏøI¼Gªoñ-”^N§çƹ,wÚ0Üc>„úдQEäþ$Ð?á#øëŸö¾«¥ìðЗÎÓ.|‰$m-ƒ•ç8õÒ¶?áVÔûãŸüöâMþ?ޱÙÿkêº^Ï |í2çÈ‘±rFÒØ9^sP=+cþgýO¾9ÿÁÇÿa@ü*ÏúŸ|sÿƒþÂøUŸõ>øçÿý…ð«?ê}ñÏþ?û ?áVÔûãŸüö|ÿ’C¡ÛÇþ”I^^ðKþI…ÿoúQ%zQEQEâþøwá_j>1¿×4¯µÝGâ[ØUþÑ,x@T„`:±üëÚ+ÅüÿöŒá(ÿ„síßð’ÞìþÓò<Ï/+ŒyœíÎïlæ€:øR_?è^ÿÉÛþ9Gü)/‡Ÿô/äíÇÿ£þ-ýHßù)GüZú‘¿òR€8ÿŠ_ |áχ¶­¤èßg¾ƒÉòåûTÏ·tȧ†rŽE{…xÅ/øWð®5oìøE?´ÿsäýƒìþwúäÝ·gÍ÷wg³^á@yÄNñÄé:µ¿Ú,gþÑó"ÞÉ»l(Õ Ž@<õ òÿ‰Øÿð±üý¿öìÏø˜ùßoÙäÿ©M»·ü¿{n3ß¡ÿ Káçý ßù;qÿÇ(ÿ…%ðóþ…ïü¸ÿã”Å ÿ©ÿ%(ÿ‹AÿR7þJPÿ Káçý ßù;qÿÇ+?ᾉ§xsâ?´&ßìö0gyqogÛºcËO$žMhÅ ÿ©ÿ%+?á¿ö?ü,`}‡û3þ%ÞOØ6y?ê_vÝŸ/ÞÝœwÍz…Q@?ü7øoá-Àf§©é>}äþo™'Ú%]Ûeu+ÐÕÕÿžðý¿ònþ.±>ÿ¸ÿ…q¤ÿoÿÂ)ý§ûï;íÿgó¿×>ÝÛþo»·íŠì?âÐÔÿ’•óUòW-m¡ÅÆ}Rš©Êåwk/™íð¤¾нÿ“·ürøR_?è^ÿÉÛþ9^aÿ?ýKßùø¡ÿê^ÿÈ5çmÓ©GúÃÿN$uþ4ø[àßxiõm'Fû=ôvž\¿j™öî¸O ä Šö ù“Sÿ„Wìðÿfÿc}¯ívþ_Ù¼¯3ýrgyéŸÂ¾›¯GŠúÌù\u¶§¯€Æ}n›©Êãgk?—ù…yÄNñÄé:µ¿Ú,gþÑó"ÞÉ»l(Õ Ž@<õ ñÏ_aÿ„«Á_Ú_gû'úw™ö¾_ÜÝÇ\~5½Yû8Jv½•ΚÕ=•9Tµì›·¡ÑÿÂ’øyÿB÷þNÜñÊ?áI|<ÿ¡{ÿ'n?øåy‡üPÿõ/ä?â‡ÿ©{ÿ ×ýµÿN¤|ÿúÃÿN$zü)/‡Ÿô/äíÇÿªÿlmôÍ;Å–qùv¶¾%¼†ÜNÔQQ“ÉÀ­yÇüPÿõ/äï~ùðxƒì¾_Ùÿ·î|¯+6ìqÆ1ÓÙƒÇýfMr8Û¹èeùŸ×$ãìÜmÜõ*çüwÿ$óÄ¿ö ºÿÑM]sþ;ÿ’yâ_û]覯@õÂ< ©ø7C¿¼Ð¼Ë«­>ÞiŸísÎÑ©c€ø$ô­øR_?è^ÿÉÛþ9Xþÿ…Yÿn‡ý£ÿoÛ¿³íþÑö²ùžg–»·çÙÎsÎkcþ-ýHßù)@ü)/‡Ÿô/äíÇÿ®_Æü+áGÁ×ú•öK©"x—áN¡àMJ×ÃVúRjïå}œÁ¤´.1*ØÆ>PÝùé_CׇüRø¥àßü8Õ´'YûEôþO—ÙfMÛfF<²8òkÜ(¯'ø­{¡iþ;ð-׉RÒûCíx ÈsÊsóíÇZõŠòÿ‰ÞáψþÕµk³ØÁý£æK±ŸnèQG <8Ÿÿ —ÀÏùõÐÿðDßüføL¾Ï®‡ÿ‚&ÿã5ÐÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝsÿð™| ÿŸ]ÿMÿÆjÇÂ›Ý Pñߎî¼4&ÿÙÿg@aAˆœ6˜7nzÖÇü.߇Ÿô0ÿä•Çÿ¬ÿ†úÞâ?ˆþ>Õ´›´XÏýå˱“vØ]O ‚9êQE|ñðïÄ¿ tÿi¶¾%·ÒŸWO7í}%¦s™\®\Fsò•ïÇJéÿá2øÿ>ºþ›ÿŒÖÂߊ^ ðçÃ'IÕµŸ³ßAçy‘}–gÛºgaÊ¡‚»ø]¿?èaÿÉ+þ7@ÿü&_?ç×CÿÁñšÃ×µÿ†ú®³á<šš‚øŽÉä6ºi·o+q,Qr7ã>ž•ÞÂíøyÿCþI\ñºåüañ¾.Ô|a¡ê¿kºÄ¶S2}žXð€°'. uaùдQEã~5Ôü)¥|l†Ghú{xqR1uhnÍûK… Ø;Cs_Z“þ/Ÿóë¡ÿà‰¿øÍXñ'ŠtoüuŽÿ\¼û%¬ž«ùO&\Ü’ÑOå[ð»~ÐÃÿ’Wün€9ÿøL¾Ï®‡ÿ‚&ÿã4Âeð3þ}t?ü7ÿ®ƒþoÃÏúòJãÿÑÿ ·áçý ?ù%qÿÆèŸÁi¬ntoÏ¥ˆÆŸ'ˆîžÔGÅŒ¦hÛŽ01^™^oðvúßSÓ¼Yg'™kuâ[É¡}¤nF•8<Œ‚:פP_[Éj°(.þs”ÏzûƼ?áoÅ/øsáÆ“¤êÚÏÙï ó¼È¾Ë3íÝ3°åPƒÁƒ]¸ tðUHEJêÞòºéæ»(ó+çYz'ýñÿ֣βôOûãÿ­_VÿÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝzßë-ùóOÿægìWv|»¤ÉjÚÖœ# ¿ípã ã÷xÿ>)x7Ä~}'IÖ~Ñ}=ݧ—ÙfMÛn#cË €O&½‚¼œ~:xÚŠ¤â£eouYuó}Í#U`¯ý¤Š,þ2cgú^r3Ú÷ºòÿ‰ÞáψþÕµk³ØÁý£æK±ŸnèQG <8ÍF«¥R5¿+O]´î6®¬|ÅçYz'ýñÿ֣βôOûãÿ­_VÿÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝ{ÿë-ùóOÿæeìWv|¥çYz'ýñÿÖ¯XøYñWþð•ÍŽ£ô÷Œ³¬v‘)Ù—–eØ?tçgÕávü<ÿ¡‡ÿ$®?øÝs<7áOi~-¾¿Òí5.üGtðÜ´E%1-×kðê9Î2:zšàÇæµ1±Qœ#*·êË…5¦¦‰ñãÁ·Ú<:µïö]óîó,ü©§òðÄcÁÈñÓ8í\¾£ñïÃþ!ðn«¥Ýi÷Ö÷º}Ì#…–v à†9ùGÜ'žkÚ4MNðæ“¤ÛýžÆ Þ\[ÙöîbÇ–$žI<šàüAðËÁ~ð/‰®4ïÚ,ÿÙ“ºÉ6éÚ6Xœ©C!b„œ®:A^Yg/á¯|¶ð®‘©m£B;(RèÉ¢³±” –òŽã»<äæµ?á2øÿ>ºþ›ÿŒ×ŒXxƒK‡N¶ŠK¬:DŠÃËc‚Ïj±ÿ .‘ÿ?ù ÿ½èe8YE7‰Šû¿ù#׎]‡qM×_‡ùž¿ÿ —ÀÏùõÐÿðDßüf°õíᾫ¬øB榠¾#²y ®šmÛÊÜAË\Åxϧ¥yçü$ºGüýÿä7ÿ ±¤jö7þ2ð´V³ùŽ5»F#c<Áê=ëÎßæG ‚£ZŸ<꨾Îßæ_ÿ„Ëàgüúèø"oþ3Gü&_?ç×CÿÁñšòøItùûÿÈoþÂK¤ÏßþCð®¯ì|/ýGðÿäŽìÜ?ý/ÃüÏø%ÿ$‡Bÿ·ý(’½¼ÿà—ü’ þÞ?ô¢Jô ùóÆ (¢€ (®]µÛû?k6÷îÒ-tûI¢É$óo’ITŸ-#,s°ƒ‚vˆÔÿmê+É|â­+Â×¾/´ÖòÞiüI{q[ å D#’§½t^ ñHš+-u;íÄ]¬wSɧMÌÅÕ–(mâšÝ¼ÖgÁ;FT/¿ž¹Õï<'a>»‰¨:6ÿ6 ²î!ã Á“k2ƒÁ$qŒP_ü- ÿÏÝ÷þ ®ÿøÕð´<'ÿ?wßø*»ÿãUØWÌši|1­5ì‹ã»Ÿ¤z²<¦)ãW‘ĉä !rvž]—?Â;¿Š^4Ñi:N¬÷Ö÷ÐyÞd_Ù·/·tÎÕŒƒÁƒ]‡ü- ÿÏÝ÷þ ®ÿøÕvPÿ CÂó÷}ÿ‚«¿þ5\¯ŒüU¥x¦÷š;^\M‰,®$ a(Õ4->ohÚDöÖ÷S•Ô'òˆòÂcŒYqÉІÚEvŸð´<'ÿ?wßø*»ÿãU›ð™üû/ݬr¤7~$»¸€ËF^6U°À‘ê*×Áù­çøQ =­¯Ù£:ó åÖGù?Þ`ÍŽÛ°8ÜPX>7åðˆãÝô»•UQ’I‰°­ê(ÀôiÖ^Ó-nù'‚Ò(ä_ìùÎÖTŒ„ÇQZð–é>·ßø.¸ÿâ+Û+Å“Ý[x7\¸±iò->áàhÆXH#b¤{çÖ±µµ‘ܳ ©ZËúùžWÿ n“ë}ÿ‚ëþ"³5-VßZ×<) „W’ÊÓÃþ5¿Òíu[M]/´xo–êÊáöE†¢xÎ>}ÏÁ`@è»ÙG¾Rž.s‹‹Hš˜ê“‹‹KP¢Š+”ã·ßø.¸ÿâ+sâþ‰á׳´Õo|9ý±®ÝË•§#]I yŽ]>×Q´ÇÔä Èï<9 Xø[Ãöz.š² KT*žcnf$–f'Ô±'ŒxqZ}v§dký£W²þ¾g‚xcãü+ÿhºž¾žê?Í{‡û2s+8Øv±nœ|רèß|7©hö——K¨XÏ4ažÜØ\K°û:FU‡pGPF@<úŠã8m.¢¾²‚îܳCq/Æ[ PÔ4K Ïy›y5 ž}iÜ£I2+7=pIÇLñ^×_"kWW°øÏÅ+oaö„:ÕÙ/çÁó5IòFç^ ±yoÑ6ÿÏOÿ…ñªÿÐÿ•eÿãuŸÿ m?¶?µÿáXØÿiÿÏïÛ¢ó¾îß¿ånû¼uéÅy·ÛõOúÿäÊÿ…oÕ?èÿ“+þÏõ‡Ý}ëüÏ_û?ÉSÿ—ÿ"z¥Çí¨Z@ÓÏà‘®2µTã'£¯q¯‹5›»ùt™Òm7ÉŒíÌžz¶>aØW«é~.øÎž7Xïü9wsj÷ØGoçä8ä«ÁÎó’1žk¢”ùÕÿ#ÊÇa–¢‚Miy4ú÷H÷Êá>"üE—À“éA¢ÿiɨùØjìòÂêÙÎÿn•Ëx×ÅÿWF„é~ »Ò'ûBîžÖâE™v·ÊcT$ àîí€;×—x£_ñæ¨Þi2,¶ït–¯<e–à…÷)À‚€ zqrvMœÔaí*FÚZoò='þÆ«ÿB7þU—ÿÔsüq¿º·–ÞãÀ1ͨRHäÕ•ÔŒAÆ+ʾߪÐÿ&Wü(û~©ÿ@ü™_ð®O¬>ëï_æ{ߨÑþJŸø ¿ùÕ`øãko½¿€c†$qǪ"ª(ŒWkðëâ,¾;ŸW‚}û2M;ÉÈûP›˜öUÆ6{õ¯~ߪÐÿ&Wü+Õ?gé%—Vñ{Í“!ûcÜ,½ÅkJ«›µ×ÈãÇeë MMFK[{ɥ׺G¸ÑE¹äž_¢|Hñ—ˆôx5m'áÇÚ,gÝåËý¹ nÚÅO  ŽAŠÐÿ„·âýü¯Ûÿ…yw€>-ßøcÁ:~…~Ý¿™‹í‹~é¾éCŒnÇ^ÕÒÿÂúÕ?èGÿʲÿñºÍÖ¦œ—ÞvG.ÆN*Q¥&Ÿ÷_ù–'Äeäw–4«K¨ó²h5+8Ýr8`™Æ´&ø‡â­3QÒ!×<ýk©jØ-ÇöÄSmyÊ‹“€ íÓ­sð¾µOúÿò¬¿ün±õ‰—¾3×<#§\xsû5#ñ œâo· ·Åví1÷³ŸjZmÙI}â©—âéÅÎt¤’êâíùBQE¡Èpþ$ñ¾³¦xÊ? h~þÚºm<_±þÐKm©æÏß\;÷éÅWÿ„·âýü¯Ûÿ…cø“QÖtÏŽ±Í¡è?ÛWMá oöĶڟi$¾ç8 {ûVÇü%¿ÿè˜å~ßü(ž¥«øÇY·[}Sá¥ô áÖ;­fÖU ‚2)Á#>椱׼o¦YÇgað– KXó²5»hÑrI8P¸$ŸÆ¬Â[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øPÇüYqâí;QšóJþ˺°Ô%°šßí|E9 :g=«Ø(®?Æ^2Ô|9¬hšN“ lßjÞ—Ûßo”ªÇ–RžHéß5ØW—üH»Ôl~#øçIÒÿµ/“ûG˳ûBÁæfçn'ž¸ÇzÐÿ„·âýü¯Ûÿ…gêz‡‹5¿+û[àÕÿ“Ÿ/íz½¤»3Œãrœg§ ­øK~!ÿÑ0ÿÊý¿øQÿ oÄ?ú&ù_·ÿ ŽxöÖÞ+{…QÃH8ã×m•Q@ÀÀqŠÜð?‹.<]§j3^i_ÙwV„¶[ý O‡Œ)o˜:±g§^kþ߈ôL?ò¿oþ_àì×w‹&¼µû%Ôž%¼y­üÁ'”äFY7##®(Ò+?]Ôÿ±<=©êÞOöIn|­ÛwìBÛsƒŒãÁ­ çüwÿ$óÄ¿ö ºÿÑM@½‡Žüw©éÖ×ö <Ë[¨’h_ûv¹§r2ëV?á-ø‡ÿDÃÿ+öÿáXþñ?Žíü¡Ãgðëív±éöé ÇöÜù¨#P¯´Œ®FLÖÇü%¿ÿè˜å~ßü(?LÔXÚƒ# “ŸozöŠ(¢€<ßÇŸ.<âx4[oi<–kvdûh‡h.ÉŒ9û½sߥsð¾5_ú¿ò¬¿ün©|UÒ5kâõµ¶‡¦ý¾ét‘¢óÒ, Ál¹©õÿÄ?ú?ò¥oÿÅVSu÷Q݇§„”/VM?ëÉ%ׯ«›ï#íŸ ¸ò%Yáóµ(ßË‘~ë®cá†NäUø_¯ýßùV_þ7\Ÿü _ÿèPÿÊ•¿ÿGü _ÿèPÿÊ•¿ÿSÍ[±·²Ëÿçãþ¾G¼x'Äÿð˜øBÇ_ûØþÕæ~ãÍó6í‘“ï`g;sÓ½tçÿ¿äè_öñÿ¥W Vç–QEQEóm¯€|Oâ¯x¶ûE¸Ò#¶Ä°½yF0Ãõ¯¤«Åü¡ø«SÔ|c6‡ãì[Uñ-ê5¿ödW;Ÿ*Kîs‘@Ç·½Lá«Iáñ5póö”egÜÄÿ…7ãïùüð×ýýŸÿˆ£þß¿çóÃ_÷öþ"½þ/ˆôSÿòoþ4Â%ñþŠþP-ÿƲúµå;¿·3ùúÏñ¯ÃOxÂ7Ú¦©s¡½œ_˜¶²Jd;¤UÜ u#¿L×ÔUáÿ¼;ã+‡µÎ­ã¿íKò|Ë?ìˆ`ó32ó©ÈÁ ñ×ï^áZBœ`­c‹Š­ŠŸ=is=µìâÿ¬o5ø2ÒÁ [™>ݰÎHN2sŽz^Ñ^UñGK»Ö¼à=>ÃQþι—ûCe×&òñ1ù ä?Ò­Jœ£ Úv¾×"OgV3ìÓÓÈó/øB|]ÿ=ô?ûî_þ&øB|]ÿ=ô?ûî_þ&½;þ‹¿è¢ÿå/þ.øUþ.ÿ¢‹ÿ”H¿øºð~£™w‡ãþGÐoOþ~Oð<ÇþŸÏ}þû—ÿ‰®ëàmæâ?Ú_´ sØw˜ )ÊHF3ÏB+Sþ‹¿è¢ÿå/þ.“áv—w¢øÿÇš}þ£ý£sö~û¯ Cæf'aò@À ~®ÌJ£•w[¥ï}<ŽÃ3–*’ƒ”õùž«EW¬yÈÞð·ˆõZ]ØK¥-´›ö Ú@ü;œ uµÿá ñwü÷Ðÿï¹øšØøyájžÓo,–†kV£9è’ѮާÿO‹¿ç¾‡ÿ}ËÿÄÓ ðx«Â—œšs@Úíš(¶g-»~yÜÆý+пáñwýÿùF‹ÿŠ®gÆ^ ñ©ðôW~3’çízÕµ´/ž5¼»l¡•²JóÆGÔb´Ëó%W{X;½”eÆ)ŠÍ*Õ£(9I§Ý«DÑ_Ó<>"ºŽIöó,`¶ÑÀÉç«Ó+íiKš—tyrµÝ‚¼?áoü,øWOöü"ŸÙŸ¾ò~ßö;ýsîݳåûÛ±ŽØ¯p¯øg¥xÊçáö—.“ãìÛó|»Oì˜fòÿzàüìrrryéœW>3CS+&í³zü¯Øq‹“²;¯ø»ÿõ#äÝñwÿêFÿɺ£ý‡ñþŠWþP­ÿÆì?ˆôR¿ò…oþ5笙_üýÿÉeþEûö3|iÿ þ§þßÿ„Sû3ívžwØ>ÑçÇÄ{vïù~öÜç¶kØ+Å<]¥xÊÛÃæ][ÆÿÚV+wkæZdÙþ‘:œŒ:ãíuèàñÔ1u0òºNÛ5¯ÎÝÈ”\]˜W—üHþØÿ…àì°ÿiÿÄÇÉû~ÿ'ýJnݳæû»±Žø¯P¯/ø‘i¨ß|Gð ¶“ªeß?ö—yöuŸËÄ(OÈÜ€G=3žÕÖI¡ÿþ¤oü›£þ.ÿýHßù7Gü"_ÿè§ÿåßühÿ„Kâýÿü [ÿñwÿêFÿɺ¯ðwíŸÙÞ,þÑò>Ýÿ -çÚ>ÏŸ/ÌÄ{¶g¹Î3Î*Çü"_ÿè§ÿåßüj¿ÁØn-ôïCyuö»¨üKx“\yb?5ÀŒ3í.NNLФW?ã¿ù'ž%ÿ°U×þŠjè+Ÿñßü“ÏÿØ*ëÿE5pþÿ…§ÿn‡ýÿoسíþÏöµyž_–»wãØÆqÆkcþ.ÿýHßù7XþðÇŽî<¡Ígñì–²iöï ¿ö$yHcR©¸œ¶O\VÇü"_ÿè§ÿåßühÿ‹¿ÿR7þM×/ãøO?´|ÿ Gü#Ÿaÿ„–ËgögŸæy™lgÌãn7{çÔÂ%ñþŠþP-ÿƹh~*Ó5M®xÇûjÕ¼Kd‹oý™¶×Ëûäà1ïí@ÑEP“ø“þOø^±ÿÂ/ý•öïøF†ÿí?3ËòþÒs/ÙÛíŒÖÇü]ÿú‘¿òn±üI§k:ŸÇXáÐõïì[¥ðÐv¸û\îO´Skœ ’}½ëcþ/ˆôSÿòoþ4Åßÿ©ÿ&èÿ‹¿ÿR7þMÑÿ—Ä?ú)ÿù@·ÿ?áø‡ÿE?ÿ(ÿã@Á/ù$:ý¼éD•èçÿ¿äè_öñÿ¥W PEPEP^/àýÅZž£ã´?ÿbÚ¯‰oQ­ÿ³"¹ÜùR_sœŒ‚=½ëÚ+Åüà¿øHõÞÂMâ=/g‰obòtËÿ"6ÁS¸®Ó–çôÒ€:øD¾!ÿÑOÿÊ¿øÑÿ—Ä?ú)ÿù@·ÿ?áVÔûãŸüö¬ÿ©÷Ç?ø8ÿì(ø¥áßXü8Õ®uoÿjX§“æYÿdC™™NF ޏÇz÷ ðÿŠ^þÄøq«j?ð–ø®ÿÉòѯõ/6ÌÈ¿2íÆr=À¯p ¼¿âE¦£}ñÀ6ÚN©ý—|ÿÚ>]çÙÖ/¡?#prôÎ{W¨W—üHÒ¿¶þ#øNû}õ‡ý£þ“a7•2bo•°qœ`û@ð‰|Cÿ¢Ÿÿ” ñ£þ/ˆôSÿòoþ4¬ÿ©÷Ç?ø8ÿì(ÿ…YÿSïŽðqÿØPÿ—Ä?ú)ÿù@·ÿÏøoi¨ØüGñõ¶­ªj_'öw™yöuƒÌÌ.Gȼ qžõ¡ÿ ³þ§ßÿàãÿ°¬ÿ†úWö'Äißo¾¿ò³¿Òoæóf|Âíó6qœ`(Ô(¢Šð¯†ñn¥ðïJ»Ó/þÙ¶oÙ"ÛÿfÇoµòÄ>å$ž÷ö®³þgýO¾9ÿÁÇÿa\¿Œ<ÿæ£àëÏøI¼Gªoñ-”^N§çƹ,wÚ0Üc>„úÑO/ÂS’œ)E5ÕE_ò9=ÙíQEvxÄVÕ—ã-¹Ñµ_ìÛŸøG×tßgI·'Ú+µ¸àçÚ©yÞ<ÿ¡ãÿ)0V÷Šü/‹~8Åaq¨j6)†ÄÂK „NH¹a‚H<|ÙǨ¡ÿ OKÿ¡¯Å¿ø1_þ7^.;,–&·´\¿8Å¿½¦y˜ªYŒª_[–=¬Ÿèr>w?èxÿÊLyÞ<ÿ¡ãÿ)0W]ÿ OKÿ¡¯Å¿ø1_þ7Gü)=/þ†¿ÿàÅøÝqÿaÔþçþþDçú¾sÿA+ÿ_ä7àyœøÄfê´\Ÿ\ù³l æ>ÈòÛG''½>¼ÛàÕ’éš_ЬY¦Ko]²LÛ¬c,{“Œ“ë^“_IòÅG±ìÅIEs;°¯˜ü'‹WÁÖLñWØlÿyåÛÿgÅ.ÏÞ6~fäääþ5ôå|9§ßj0ØÇ¥ü â8®TrzM丌ãýŸËuï{É5e§TõÔåÇ}cÙ¯«Ï’WÞ×Ó±íw?èxÿÊLyÞ<ÿ¡ãÿ)0WiêÿôÕ?ð-ÿÆí=_þƒz§þ¿øÒÿˆsšv¥ÿ€Ãÿ‘<«fßôÿ’¯ò=7[“Å­aÔüUöë?µÛy–ÿÙñE¿÷é™y8?…} öû?íìïµÁöï+Ïû7˜<Ï/;wíë·3ð<šMÿØ/‡Ûü»Ÿ%eÙû¸óò·##ŸZ©ã‚𯳧[A¤øÃU½Ù.ù ׯžXú ¯ 2G ä1äcÍ|ðúóᬺPøŽæínžáãòã0y 0ÅpíË ž8Q×µÆJíQ¤í)h½^‹ñ:+):rävvv}™ßi|Cÿ¡óÿ)ÿáGö—Ä?ú?ò‘oþã?Úz¿ýõOü ñ£ûOWÿ Þ©ÿoþ5Õþ¦ñüýáþGÏû<Óþ‚þ¿Èöoí/ˆô>å"ßü+¦ø"× ø‘ï.>ÑtÞ!º3M°'˜ûcÜÛG''¥|çý§«ÿÐoTÿÀ·ÿ÷ÿÙåü ª¼ŽÒ;kw9f>TY$÷5•\3ËW>:jJZ+[¸ô2øâÔŸÖ*ó®š%ú·\ÿŽÿäžx—þÁW_ú)« ®ÇòO¹ 1Æ«QßȪŠ€€*oøJüUÿCoˆ?ðc/ø×ÐÆ3 ÅI%gæeí }ÿ—Ä?ú)ÿù@·ÿåüa¡ø«LÔ|6¹ãí«Vñ-’-¿ödVÛ_,CîC“€Ç¿µx×ü%~*ÿ¡·Äø1—üjî­ëZ—¼-¥®j—ð®³hËÝÛÊ¡¼À7ÇÁ#>æ³Äðî;JUª%e¾ «E»#ì (¢¼#SÉüI§k:ŸÇXáÐõïì[¥ðÐv¸û\îO´Skœ ’}½ëcþ/ˆôSÿòoþ5âMþ?ޱÙÿkêº^Ï |í2çÈ‘±rFÒØ9^sP=+cþgýO¾9ÿÁÇÿa@ü"_ÿè§ÿåßühÿ„Kâýÿü [ÿð«?ê}ñÏþ?û ?áVÔûãŸüö|ÿ’C¡ÛÇþ”I^^ðKþI…ÿoúQ%zQEQEàz †ƒu¬ø½õOˆ:—‡g#½ ik¬¥¢ºî9F$œŒÿ²j÷ÊùÊÇþoøH<[ý·ýöŸøH/6ý·Êß³xÆ7sŒçõ­)Þ\·±­~Ò\·±Öÿcø?þ‹>¹ÿ…D?áGö?ƒÿè³ëŸøTCþ‡ÿûþ¥ü—£þ-÷ýKù/]Tþò:¾£ýôTø‰¦ønßÀš”¶µ]jé|­–ëÑܤ¿½Læ02Øolgµ}_1xãþïøCïÿ²¿°¾Ûû¿/ì¾O™þ±s¼ôÏášúv°«OÙ»^ç5j^Ê\·¸W“üV†ÎãÇ~ŠÿZŸEµoí ÷ð] g‹÷IŒHx\œ/¾qÞ½b¼¿âGö?ü,ÿoý‡û3þ&>wÛöy?êSníÿ/ÞÛŒ÷Ådbgÿcø?þ‹>¹ÿ…D?áGö?ƒÿè³ëŸøTCþÐÅ ÿ©ÿ%(ÿ‹AÿR7þJP?ýàÿú,úçþÿ…XøS ¿Žüw†µ>µj¿Ùû/çº//îŸ9pØ9_lcµlÅ ÿ©ÿ%+?á¿ö?ü,`}‡û3þ%ÞOØ6y?ê_vÝŸ/ÞÝœwÍz…Q@<|;Ó|7qàM6[ÿ‰Ú®‹tÞnû5èí’/Þ¾1\Œ7¾sÞºìÿÑg×?ð¨‡ü+?áoü+øWOöÿü"ŸÚ¾ó¾ßö;ýsíÝ¿æû»qžØ®Ãþ-ýHßù)@ÿö?ƒÿè³ëŸøTCþ‡¯Xh6ºÏ„Køƒ©xŠsâ; Ö—ZÊ]ª.ãó„QAÀÏûDw®óþ-ýHßù)\¿Œ?áþÑðwü"ÿðŽ}»þ[-ÿÙžG™å峟/¹ÛíœP´QEã~5¶Ón¾6Bš§‰îü;ðâ•»µ¿[FvûK|…Û‚ÉÇû ö©?±üÿEŸ\ÿ¢ð«$ÿ„oþ¬ð”e}‡þ¡³ûOËòüÏ´œcÌãv7{ã5±ÿƒþ¤oü” ûÁÿôYõÏü*!ÿ ?±üÿEŸ\ÿ¢ð®ƒþ-ýHßù)GüZú‘¿òR€)üHcѼN–÷²_@¾#ºÝÉ(•§]±áËŽ°ç=óšôÊóƒ¿cþÎñgöw‘öøIo>Ïö|y~^#Û³mÆ1Ž1^‘@|Åàü<Ö|a®øÅ´ýJ_3ε¼[1#ùK ¨Ÿ\×Óµáÿ á\¸Ò·ÿáþÓý÷öÿ³ùßëŸníÿ7ÝÛŒöÅ\*Nðvô \§ÿ Ëá/ýÿÁÕ¯ÿGü+/„¿ôP_ÿV¿üEwŸñh?êFÿÉJ?âÐÔÿ’•§Ö«ÿ;ûعWc̵¿ü<Ѭ#¿Ð¼bÚ†¥Ý·“ju;ywæt ò"†8RO™¯£kÇüiÿ ãþ§þÀÿ„SûOívžOØ>ÏçÇÄ{¶ìù¾îìã¶kØ+9ÔœÝæïê4¬á´4PO¨øF;™Œ1¶n0\qsï^é^_ñ#ûþ?€·þÃý™ÿ;íû<Ÿõ)·vÿ—ïmÆ{â)û9Æv½šeA¨É6¯ä|ÿý•¢Ðeÿð)?Âì­þƒ/ÿIþô‡üZú‘¿òRø´õ#ä¥{Û_ôí}ïüÎß­ÐÿŸ1>oþÊÑ?è2ÿøŸá^íû=¢GàY"}ñ®³0WÎw.,÷­¯ø´õ#ä¥Wø;ö?ìïgyaÿ„–óìÿgÇ—åâ=»1ÆÜcãÇŒÇ}f)rZÞmþf5ëS¨’…5CÒ+Ÿñßü“ÏÿØ*ëÿE5tÏøïþIç‰ìuÿ¢š¼ó˜ù&ÓLÒ$³åÕ$hÔ²}¥ÓŽF1ÅMý•¢Ðeÿð)?½Aÿ„þÝ3íðŽý§ì‘yÞw‘¿~Á»vyÎsœÖ‡ü[ïú–?ò^¾šëE.U÷¿ó=ÈB*¼"y_öV‰ÿA—ÿÀ¤ÿ µ¢Ã¤iž.ðÝÚjªÉ­jÒ´·V4XúާŠô¯ø·ßõ,ä½ck¶>Ôõ Øiñèò}«[µ†á,š0í ÌÈý+,V3žŒ£ÊµóæExQTŸ,"Ÿ–ç®Øü`ð¡y¬>#$|àÏ À'—u :w<ôëW5/‰ÞÒ­Ö{鮌ᵘ\6pO+âqŽž¢¹ý/àWƒt¯.ª‘ÝÜ";²ØÝ´s[áJ²d…Ï$äI­O|%ðˆôèìÿ²àÒöJ%ó´Ëx`‘°Ú[aÊóœzé_ÒÆrí¬+–À!@À>®V?†üS£x»N’ÿC¼û]¬r˜Yü§$aÀ=~u±@sÿð‚x?þ…MÿÐÿñ5ÐWŸÿÂíøyÿCþI\ñºÜŸÁ~ µ·–âãÃ^†¼’Ia ª($’¸s^Aàˆ|5â/‹†[‹½…þ$¶zjZÛ…·Äÿ"2©`gò—ÌoâØ`Åwóüfømuo-½Æ·ÐJ…$ŽK Ù]HÁðAb¹»|Ó<[ˆì5-.£´6© d‘¹bLD9JîÏÝ8 JÁŠE’? èˆèC+.Ÿ Ž„µ½\¼sq¼:þéepˆ¿cœd“€9Jï(¬ýOBÑõ¿+û[J±¿òsåý®Ý%ÙœgÆp:z ЮÄþ6ðËý¿¨}í[üŸÜÉ&í¸Ý÷ã—¯­ð‚x?þ…MÿÐÿñ5æ¼/y`ú:x#ÂkåîüÞ`*2ä´dø< œ ë?ávü<ÿ¡‡ÿ$®?øÝrþ'ñ·Ã?j6·çÇzæ—uo¯¦˜w#H ÄGU1œ çjà øuᯠkŸô}NóFÐõ«ˆ‹Íqý ?>æ ›BòS s·=ë¼Ót7F·k}/O´±œ»Gk Ä¥°HPpϰ¯7ðçÄï…¾ðýž‹¦ëò KT*že­Ã3K3åõ,IãžWyá¿èÞ.Ó¤¿Ðï>×k¦)ãÀ pFlTsÁ Õ¼¶÷G4¡I#‘C+© ƒÁqŠ’«ßß[éšuÍýäž]­¬O4Ï´¨ –8œzP?ü žÿ¡SCÿÁt?üMfø‡ÂþÑ|3ªê±øCA‘ì¬æ¹Tm> ¢ü½ñTávü<ÿ¡‡ÿ$®?øÝCwñ‹á¥ý”öwZâËoÉ8Üì@Q’˜$u âŠ(  ½KÃZ³p·¦‰¦ßN¨dºµIX.IÀ, ÆI8÷5OþOÿЩ¡ÿàºþ&«ø“â'…|#¨Ça®j¿dº’!2§Ùå“(I厪*Çÿ…Ûðóþ†ü’¸ÿãtÏüJð~‹¡Ù¶»ö‡ é°Ä#û"ønÒéç›,FÂàÄ`mè6’H#¬ðçíÏÃöpkÚ'‡õ QP››˜ô¨#VbIÀÀ.p3ŒàgÍëÿ~xû/íž%?³uµ|›I†é#ÎÐÙˆåy9Þ¶?ávü<ÿ¡‡ÿ$®?øÝw6zeœvvZZÇÁ$“… ’OãV+?DÖôïèðjÚMÇÚ,gÝå˱“vÖ*x`äÈ­ (¢Š(¢Šãï¾x'R×$Önô%¾’Q4Œd#¸ÁËFaÉ96Ns“_8êw0é^+ñ%¾?‘±t±¥¬ËE@PÀ=1__WÉz£]ø«ìúV£vŸÛwy’ÚÜÈ ù‡‚Gñω—-;ž¦QrÄ¥R|ªÏ]Ìÿí±ÿ@ÝGþüõèþÛô ÔïÇÿ^­nÔ¿è]×?ð ¨Ý©л®àW›õ%÷ŸWìpôøÚΩö&x¾Ã{í¿<±mQóÉÍ}§_kÍ|t[;EÕm£ùs,ö¬ˆ¿0êOOJû.½ $ùàß™òùÔiG•)ó«okuzxwí?ÙµoKåI.ß¶ü‘.æ?,C^ã^#ñôÊ5¯ù6ÓÜÉþ›ˆ Bîß,]ëë[Ôvƒ~GE⠤쮵í®ç”ÿmúê?÷ãÿ¯GöØÿ n£ÿ~?úõkv¥ÿBî¹ÿ€MFíKþ…Ýsÿš¼¬y/¼ûcƒÿ ŸÀ«ý¶?è¨ÿßþ½zŸìý?ÚuoËåIï±|’®Ö,£‘^m»Rÿ¡w\ÿÀ&«žø‹¨|>¹ñ$ñøvyd¼û<{®·D-¤ !Më´çvI •$)Áî:°•yæÕºw<|êž8téVçwÚÖèõ>®¢¼oÁ_u-gFšãTð–³}:Ü2,šœÒÀjœ1g'~I$z¬ý¡5è¼Uw`þŽæ´<¶j¯ØmøEq—ñÁP¿xñÓÐ>`ËÚ7…ü¦h×zF¹q=¿›ºK[dxÛt®Ã¸'†§\×Uÿ ÷Ãßôñ/þÇÿÇ+´¾-¿“¢ê·1üØ– Vto˜ô#¯¥hîÔ¿è]×?ð «Îž6Q“–žgÔaòŒ¶¥Nx†›Iµm¶=“þèâ_üÿŽW?â?‰ÚW5éÖ:f¯k,^#²œ½ì ˆ@b¸9çæ»ó½Ú—ý ºçþ5M¥µÙñ…~Ñ¥j6‰ý·i‰.mÌjO˜8÷ÿN–2Sš‹K_21yV]J„ªR¯Í$´VÜúÒŠ(¯@ù£Çüaâû/üo‹Q¾´¾ºŠ_YFÁ7,Ù °ãå=ûеÿ ÷Ãßôñ/þÇÿÇ+øÐgm>Ïeuvÿ؉˜í¢20|œ;ˆ®+v¥ÿBî¹ÿ€M\5ñR§>T—Þ{ùv[ÄPö•ërÊûXöOø_¾ÿ ‰ð?þ9Gü/ßÐÄ¿øÿ¯Ý©л®àQ»Rÿ¡w\ÿÀ&¬~¿>Ëï;ÿ±r¿ú qíÿ5õm#ÅŒ1ËW~#»Q‡PëÀƒÏ<×¥×”|Ýÿvµ¾7‰ÿ¶çÝ‹µ”ùqpGb+ÕëÓ‹ºLùJ‘Q›QwI…xÂßÿb|8ÒtïøD¼Wäùßé6o› ægo•· ã8>à׸W‡ü-ñÿö'Ã'Nÿ„KÅwþOþ“a¦ù°¾fvù[pÎ3ƒî 2ÃþŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þÎøZõ!xçÿÿýð´ÿêBñÏþ ÿû:çüiãÿí¿ >ÿ—Šì<ë»Oô›ý7Ê…1q|͸ã8À÷"½‚¼Æž?þÛðÓéßð‰x®Ãλ´ÿI¿Ó|¨S·ÌÛŽ3Œr+Ø(¯/ø‘ªÿb|Gð£ö ëÿ'ûGýÂ6gÌ(¿*ägÉö½B¼¿âF«ý‰ñÀ:Ø/¯üŸíôk|ÙŸ0¢ü«‘œg'ØÐÿ…§ÿRŽðOÿÙÑÿ Oþ¤/ÿàŸÿ³£þŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þ΀øZõ!xçÿÿýWø;uöí;Å—ŸgžßÏñ-ä¾MÂl’=Â3µ×³ àŽÆ¬ÂÓÿ© Ç?ø'ÿìê¿ÁÛ¯·iÞ,¼û<öþ‰o%ònd‘˜gv4éÏøïþIç‰ìuÿ¢šº çüwÿ$óÄ¿ö ºÿÑM@3é–ZëéVm…|A Ðìÿá ñ•Ç‘§ÛÅç[é[ã“lj7#nåN2q[ð´ÿêBñÏþ ÿû:ËØÀôgˆJÊßqâ?`ñý þ%ÿÁd”ëmVøU¯´_Oˆë–€K{fÑ!>`ùA=úœ{öÏøZõ!xçÿÿýrþ0ñ§ü$zƒ¬ÿáñ—³Ä¶RùÚ‡‘`°ÚqËsœzéB£˜W© lÏh¢Š+S„ùûã 7“|X´[6ûP”hhLVP\>O˜Û Ï¸®Kì!ÿ¡?Ä¿ø,’½[Äšÿü#Ÿc¼þÈÕuMþy:e·Ÿ"æäÅr0¼c>¤zÖÇü-?ú¼sÿ‚þγ•(ÉÝ”qըÒ#öÿПâ_üIGØÇ÷^eÏöœ2ìýâãå^NN¹¯£«Ãþ)x‹ÆWß5km[ÀŸÙv/äù—ŸÚðÏåâd#äQ“’ã¦sڽ»°ØZ8X8QV[õýNzµ§Z\Ówa^Kñhj'ÇNŸý¡}þŸåÛyËÿݦ~fà`dóéŠõªòÿ‰zÄÜé:_ö¥òhùvhX<Ì€üíÀÀ$ó×ïZÔ„jAÂ[=“Œ”–èÆòþ!ÿÑ?ÿÊͽ_Ä?ú'ÿùY·®§þ߈ôL?ò¿oþÂ[ñþ‰‡þWíÿ¼Ïì<üûüeþg_öŽ'ù¿þG-åüCÿ¢ÿ•›z¿ð”j#Ç9¶ŸýŸ}þæ[yË.Ïݾ>eàä`ñëŠÚÿ„·âýü¯Ûÿ…gü7»Ôo¾#øúçVÒÿ²ïŸû;̳ûBÏåâç^@Ž™ÇjèÃeØl,ÜèÆÏmßê̪â«V,Ý×Èô‹[ ;?ìv[ùò´óy1„ó$o¼íެp2O&‹[ ;?ìv[ùò´óy1„ó$o¼íެp2O&¬Q]Ç9ó_€¦ñjx+O]3Â_o³g—sý¥[ÿxÙù[‘ƒ‘øWIöÐ…ÿ•ˆ+áÿµí#Àúu—„¾ßo›²çûI"ß™Ÿ”©#‘øWMÿ #Åô!ÿåb/þ&¿>ÆÃñ59hÓk™êçg¿Uí¿%èn³º׳xˆ¦´µã¥ºþÑãÏú¿ò±dê²ø•õßgÃ?ÙvßðÙí›íñϹ÷.Õäq“ŸozÞÿ…‘âúÿò±ÿXÚÏ‹u{[ð…®£áŸì¸SÄVr ¾Þ“î`ÄmÚª1Á'>Þõ®[ BÅÓr£¯ºßÉ{Gù1K9¡Y{8×Ro¥ã©ô Q_xbx¯ßVOHþÔ¹ÿ„ywCö”ƒj}¡²Û›ƒÎ=ýª´xóþ„/ü¬AV|u­ßè?¡ºÓ´íIŸÃ«‡í+Õ7 wn`sÈþÔŸð²ceá/·ÛÅæì¹þÒH·æF'å*HÁ$~Á›bq8z*xe+ÛÞi+Y÷k_™2­F’æ¯%Ý»jz?ü'^<ÿ¢iÿ•Ø?øš?á:ñçýOü®ÁÿÄ×=ÿ #Åô!ÿåb/þ&øY(ÿ¡ÿ+ñ5óÿÛÏòSÿÀ—ÿ&G×òïùýü ñŠü[©xq­5?fYÉwiæ]ÿkE7—‹ˆÈùdä€=³šöZðøÛ^Õô•±½ð—Ø-廵ßsý¤’ìÄñ‘ò…ä€?÷êú §‰ÄQsĨ©^ÞëMZ˳züËj5W5 ).éßP¯.ø—}¦üDðÞ™¦iÞGý¡åÚy뙘¸Ÿ|b½F¼—âÖ¥u¤xãÀ·ÖVo¸‹íû-¼áüÆ€üÄ0 ?…vâg8QœéÛ™&Õö½´¾Ú|ÐÜ£Í7d·~FŸü'^<ÿ¢iÿ•Ø?øš?á:ñçýOü®ÁÿÄ×=ÿ #Åô!ÿåb/þ&øY(ÿ¡ÿ+ñ5ò_ÛÏòSÿÀ—ÿ&gõü»þGÿGCÿ ×?èšåvþ&›ðnyî´ÏÜ][}–æ_ݼ¶þ`)ÈŒ”Ü8lŒŽ¸¬øY(ÿ¡ÿ+ñ5·ðNæ[ÝÄ×SÛýži¼Eu#ü?–Åc%w¸'ï^¾QŽÇbg%ŠŒRKNVŸå)F®”*)?&™éµÏøïþIç‰ìuÿ¢šº çüwÿ$óÄ¿ö ºÿÑM^égá?øîßÁº6¾×kŸnÜmÁš‚5 ûHÊä`àôÍlÂ[ñþ‰‡þWíÿ±ü'âÛø7C†Ïá×ÚícÓíÒí¸#óPF¡_i\Œ™­øK~!ÿÑ0ÿÊý¿øPÿ oÄ?ú&ù_·ÿ åüa®x«SÔ|¹àïì[Uñ-“­ÇöœW;Ÿ,mA‘IÏ·½uð–üCÿ¢aÿ•ûð®_Æ犵=GÁÐëžþŵ_Ù:ÜiÅs¹òÀ&Ôœû{дQEäþ$Ôu3ã¬shzöÕÓxh#[ý±-¶§ÚI/¹ÆþÕ±ÿ oÄ?ú&ù_·ÿ Çñ&£¬éŸc›Cж®›ÃAßí‰mµ>ÒI}Î0p@÷ö­øK~!ÿÑ0ÿÊý¿øPÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…¿äè_öñÿ¥W WŸüÿ’C¡ÛÇþ”I^@Q@Q@x¿ƒÿá<þÑñü"ÿðŽ}‡þ[ÝÿÚ~™æes/¸Ûïœ×´W‹ø?CñV§¨øÆmÆ?ض«â[ÔkìÈ®w>T—Üç# ozê?âïÿÔÿ“tÅßÿ©ÿ&èÿ„Kâýÿü [ÿð‰|Cÿ¢Ÿÿ” ñ ?â—ü,øW·öÿü"ŸÙŸ¹ó¾Áö;ýrmÛ¿åûÛsžÙ¯p¯ø¥áßXü8Õ®uoÿjX§“æYÿdC™™NF ޏÇz÷ +Ëþ$lÂÇðöØ´ÿâcäý¿“þ¥7nÙó}ÝØÇ|W¨W—üH´Ôo¾#øÛIÕ?²ïŸûG˼û:Ïåâ'än@#ž™ÏjÐÿ‹¿ÿR7þMÑÿþ¤oü›£þ/ˆôSÿòoþ4Â%ñþŠþP-ÿÆ€ø»ÿõ#äÝgü7þØÿ…ãïíÿ°ÿiÿÄ»Îûÿ'ýKíÛ¿æû»sžù­øD¾!ÿÑOÿÊ¿øÖÃ{MFÇâ?­µmSûRù?³¼Ëϳ¬far>Eà`8ëŒ÷ P¢Š(ç/‡ÞñΧà}:óG—ëa'›å ÆœKÄŽí£AÆ;bºoøA>&Ï ßw?üMVø[áß_|8ÒntŸÿeØ¿åÙÿdC?—‰œŽNH'ž™Çjì?áø‡ÿE?ÿ(ÿã\rË𳓔 ®Ï>yVrs•4ÛÔæ?áø™ÿ=ü%ÿ}Üÿñ5­xkÅú6·á K¡µ£xŠÍX4¥÷î$gx ïœW Â%ñþŠþP-ÿƹh~*Ó5M®xÇûjÕ¼Kd‹oý™¶×Ëûäà1ïíN 59) 4:yf”ÔáM&h¢Š+¬ïÐÀã`'9+íŒÒ ñ3þ{øKþû¹ÿâjÿ‰4ígSøë:½ý‹t¾×cKÉö’ ms’AÏ·½lÂ%ñþŠþP-ÿƹjà°õ¥ÏR ³Ž¾_…¯>z°MœÇü Ÿ?ç¿„¿ï»Ÿþ&øA>&Ï ßw?üMtÿð‰|Cÿ¢Ÿÿ” ñ£þ/ˆôSÿòoþ5Ÿönþ}£ìŒüúE/‚\Úèž&·½0›¸¼Et“s°¸XÃmÏ8Îqžq^›^oðv‹};ÅÞ]}®ê?Þ$×XÍp# ûG ““Ó5éÚ’JÈôb”UWÆZŒõ'F·²´†Á ‹vÓ*¹nX“œ0I¯³kâm+Aš÷L†áoü¥}ØO$60HëŸjôrÜUÒÅSsŠW²iku®­wdÏ.y‚öJ VÖÏîýMßøXž!ÿŸ}/þø“ÿŠ£þ'ˆçßKÿ¾$ÿâ«;þ{ú ÿä¸ÿ?á¸ÿ ŸþKñ¯kýWÉ?è|ù3?õJ_óâ?zÿ3@xÏYÕ®-,®á°X%»ƒq‰\7©ËÔ úö¾6·Ðf²¿²¸kÿ5Rî §’9‘G\û×Ù5âæX .ª¥…¦á¯fÓÖï]ì!—<½{'ëe÷~^!ñ÷Q¸ÒuŸÞÚ,M<_mÚ%¯+9Á¡5íõ៴-«^ê~·Y|¦¶aöîÆG§á\”)BµXÒ¨¯4š]Sz¯™N¶^Ê׿ÒÞºmÿ Ä?óï¥ÿßñTÂÄñüûé÷ÄŸüUgÂ/qÿA?ü—ãGü"÷ôÿÉqþ5ô¿ê¾Iÿ@“ûãÿɘÿªRÿŸû×ùš?ð±ú_ýñ'ÿ^ËðâKÏk73Í­Ï#„û ˜â'í^ÿ½Çýÿò\tð¯ÄMOÂú„ÞÖ¤ŠÒÏQ–6´·¼kif”ˆÃ7R6íûÏÆ'Ÿ72ÊpFXJ2ƒ{ݧ¹²á’¼¿ßtÔo¦‡ÕµÏøïþIç‰ìuÿ¢š¸ xGâzørÐ_øú}:ëçßk>Ÿ ëÇó¶31r_#¯Çjó‰|#ñŠßIÕ®µmRú }>âK{ªùé,~Y>X©ld1œƒŠñÍhß¼]¤èz~›mc¡µ½¥´pDÒE)b¨¡A8 àsWáx×þþÿ¿3ñÊàlü9<öPL5‚HÕ‚ùã#8ÎjoøEî?è'ÿ’ãükíiex B-Щ{.±ÿ䎥•c䮢­êŽãþ÷èáÿûó7ÿªçâˆ|kâ 麵®—¼ZõœêÖ‘È­¸>Ðç#cÛÒ¸ÿøEî?è'ÿ’ãüjî£K§xÛÂÓ=眳h»| ¸ýà9Î}« v]‚§‡œéÑšinÜmøI±O-ÆÓ‹H«-õGØQE|‰ÌxGÅ?êž ø½m©i0ÙËq.‚°2Ý«2í7Ĭr£¿­`ÿÂþñ¯ýü?ÿ~fÿã•c㕃ê?,áIü’4dmÛ7g÷Ò cñ¯?ÿ„^ãþ‚ù.?ƾŸ*Àa+áÔêҜީÅ/ÆI›ÓÀbëÇžŠºõGqÿ ûÆ¿ôðÿýù›ÿŽQÿ ûÆ¿ôðÿýù›ÿŽWÿ½Çýÿò\ð‹ÜÐOÿ%Çø×£ý“—ÿÏŠŸ|?ù#OìœÃùWÞ¿Ìú7à—ü’ þÞ?ô¢Jô óÿ‚_òHt/ûxÿÒ‰+Ð+áÎ@¢Š(¢ŠŒO \=ºË]ã 7*±!I@%Xßiô  +çí¼T5¿ÿaø£û*×þ+ÍÐgÅ>çÜ2ÛŸ‘Æ=½ëßgžn.%Ž"BòI#TP2I'€ç5åkð»WšûQÔô?Goe«^I¨ªG¦G2þôî9˜cŽ^õ¥7/}]ÑtÔ¯Q]ûþ!ÿÑAÿÊ5½þ!ÿÑAÿÊ5½j°ñwý_ü¢EÿÅ×gy¨ÞkpØ'µu¶¸¿:|¡ðÜ g,Ù!Bʲœî#Ž;äàdŽŽ|7ò¿ëæu{Lò?ëæIñ¼d|¨[ÆÚ?ºó-³!‹ï2ò0pxôÅ}^A©üñ±§Ëaãÿ:Ú\oOìh×8 ŽCƒÔ õú«¦ßîÕ‘ÍYÓrýÒ² ò_‹'Q8ð)Òuìûïôÿ.ëÉYv~í3ò·##Ÿ\×­Wã¯Þx»Pѯì5ßì›/ÏØÿdîóBƒÁ`Ÿ^½±Q]slg ).mŽ3Ä?ú(?ùF·£Ä?ú(?ùF·­OøV.ÿ¢‹ÿ”H¿øºÇñ7…uß èÒjšŸÄiü„È ‡’Ff [6BžX…È®®|7ò¿ëævûLò?ëæI¿âýü£[Õï„ÇQ>8ñÑÕµí ïô2ëÉX·þíñò¯LÕ] À~.Öü=¦jßðŸù?n´ŠçÊþƉ¶o@Ûs¸gÆp+¯ð/o<#¨k7÷úïöµÎ©äo²6ùA€à1!‡§Nù¬êÊ‹_»V×™iPqýÔZ×™ÚÑEÎsð·Ã¾2¾øq¤Üé>;þ˱;˳þȆ/8?;œO=3ŽÕØÂ%ñþŠþP-ÿƳôO†þ2ðæ“¤üGû=Œ¼¸¿°á}»˜±å˜“É'“Zð‰|Cÿ¢Ÿÿ” ñ þ/ˆôSÿòoþ5ËøÃCñV™¨ø:msÆ?ÛV­â[$[ìÈ­¶¾X‡Ü‡'j§£kþ-Ö¼EŒž>ÖmïÎûÍç„àŠ¿/–òÉlôÉä t8$ÖMðóÅZž£¤M®x÷ûF×MÔ!¿[ìx¡Üñœ™# ‘߯JôŠ(¢€<ŸÄšv³©üuŽ^þź_ k±¥ÎäûI6¹ÀÉ çÛÞ¶?áø‡ÿE?ÿ(ÿãVASàì7úw‹!¼ºû]Ô~%¼I®<±šàFöŽ''¦kÒ+—ð?„î<#§j0Þj¿Ú—Wú„·ó\}œA—(o”:©IþÐö¾"º¦Ù³Ì*±©m¼ã8Î;TÓ¼{â_Ùë6ÿní »BñÇwáËd“nH€Ä`¸ò=ë¬ð?„î<#§j0Þj¿Ú—Wú„·ó\}œA—(o”:©èR¤ðm…ö•à­OÔÄbòÚÊ(eT\* mûÌ H8$r¾EÖ´«+ïø¦[˜w¸Ö®ÔÌ8ó¡÷©œ”UÙ¶„«Ï’;Ÿ]W‘Øü1ñ5Æ¡®­¦Ëá=XMRÑä¾Ú–a#s7Íœóž˜AâÿðŽé_óëÿ‘ühÿ„wJÿŸ_üˆßãY}b'wöE~ëñÿ#ìJ+âÍgFÓí4™ç‚ßd‹·{e€îk껈¾ÔüGa­Áw©I‰¼ˆØBç°áAþ.ØëÅi ©«£†–\“õÐê(¬x§FðŽþ¹yöKY%«ùO&\‚@Âz)ü«Ã¾0x‹Ã¾6ºð‹éZ„öÿéžb#tâ3ð´¶Ðìs­Z);˜ñæSíDkFNÈUrÚ´ ç&¬½ÈúêŠ(­8(¯'ñ'…´o|uŽÃ\³û]¬~*y¯\PƒÑç[ð¤¾нÿ“·ür€7'‹Î·}’G¸¹³ äƼïß ¦ð×Ä=fß]Ô®t»M0ÚÇÝÙywdùNà¹áÀ=ªçü)/‡Ÿô/äíÇÿ£þ—ÃÏú¿òvãÿŽP Q^?ãO…¾ ð熟VÒto³ßAwiåËö©ŸnëˆÔðÎAà‘ȯ` Š+Ëþ$hšwˆþ#øIÕ­þÑc?ö™öMÛaF©ràШWãMÄÚž©cu¡Ãየ!·š!ÖíYÈ2`1WPN ‚¥x';²6Óÿ…%ðóþ…ïü¸ÿã”Â’øyÿB÷þNÜñÊÜð†¦ð‡‚4Í ââ;‰í‘Ì’F]Îìä ò@-Œñœg8®’¼ÿþ—ÃÏú¿òvãÿŽUƒ¶6úfâË 8ü»[_ÞC n'j(Œ(ÉäàÖ€="Š+Ÿñßü“ÏÿØ*ëÿE5tGZÓZе *I4½¶’ÙFJ‡R¤¦kä+MšÂÚY-²ï³íÉ {ÔÿðŽé_óëÿ‘ük^(ôã•V’M5øÿ‘ïžð?ˆ4¯¿ˆOO­pŸðŽé_óëÿ‘ük)ÕŒ]™Ý‡Ëêׇ&ñd^!ŽÍ®Î½xÈg²3›øä)ã!¸®KâígÀ·~¼ƒFŠÅuÙå¬ mé8mƒg½}]^Æ_Vu)78É;ý­ú~ÏæÕ©Ö¬¥N1Š·ÙÛwøžg©|ðõºÅocw§¸pÆ[[·f#å>fñŽsÓ<zçË>&ü8ðç.|;oh÷2Gz÷Mq5äÃ$*DZ~P I=2wrO}?^7ñ²ãL´ñG‚§ÖV&Ó×íÞh–/1yHÀÊàçœv®ªòq¥&“m'¶ÿ/3‹ %Д’i5¾ÛõòîxŸØ<;ýûü ?üU`ðï÷íÿð$ÿñUÞÿÂEðÇþxiŸø+oþ7Gü$_ 熙ÿ‚¶ÿãuó¿]ÄϪŸs>·ûC ÿ>hýÈà¾ÁáßïÛÿàIÿâ«×?g˜í¢Ô¼\–…Lì{v¶áÒ\óõÍ`ÂEðÇþxiŸø+oþ7]Á;2ïÅ5ŸFX—Oo°ùB(¼µá$ ŽsÚ»òüEZ•Zœ&•¾ÖÝ?ÌͱTkQQ§Nwû6¾Ïð=’Š(¯`ùóâM.ÓE—M‰îÚ9ÎíÓ•=N8Ϧ*ߨ<;ýûü ?üUt^Ö| iá{85˜¬[P]þi–ÀÈÜ»–ØsÆ;Ößü$_ 熙ÿ‚¶ÿãuóµñuãVIS¨Òodíòò>· ŽÃFŒ#*T›In•öëçÜà¾ÁáßïÛÿàIÿâªÿ‡í´¨|máv±hŒ§Y´d»Žß0vÉö®»þ/†?óÃLÿÁ[ñºÌÕhýÈîgÅ<«¬8ò†³0L¾\Xæ½j¼·àT–³xg_–Ä(´}zá ›F‘mÀãâ½J¾–ñMŸQ§6×p¯ž>økáN¡àM6ëÄ·Rjïæý O«4.1+…Ê >P½¹ë_C×Ê>Ö| iá{85˜¬[P]þi–ÀÈÜ»–ØsÆ;×>/,<ã=meù—B’«.W%SÓ¿á øÿ?Zþ[ÿQÿoÀÏùúÐÿðzßüz¸ÏøH¾ÿÏ 3ÿmÿÆèÿ„‹áüðÓ?ðVßün¼ïíz¿ô?¹ÿ‘ÕõÏØýæß‰ü5ð§OÑE׆®4§ÕÒî×ìâ Y¦s™ã „2ü¥»qÖ½â¾aÔuŸÝÁ 4V+¨5Ý¿”b°1·!8mƒg½}=^Ž,Då mgùœµé*RåRRô òŠÖZ¡ã¿Úø•àM!ÿ´>ÐgœÂƒ!\¸#0^üô¯X¯'ø­{¡iþ;ð-׉RÒûCíx ÈsÊsóíÇZê1+ÿÂð3þ~´?ü·ÿ£þߟóõ¡ÿàõ¿øõð™| ÿŸ]ÿMÿÆhÿ„Ëàgüúèø"oþ3@ü!¿?çëCÿÁëñêÔø- ¶âx4³ÓãñÒZ˜äÞ¦ ±„ÃdîqÎNk/þ/Ÿóë¡ÿà‰¿øÍj|šÆçFñ<úXŒiòxŽéíDqìQXÊap6¸ãé•ÏøïþIç‰ìuÿ¢šº çüwÿ$óÄ¿ö ºÿÑM@Wᯠü¹ð®‘>©s£ BK(^èI­20” /•óFÓ»<`bµ?á øÿ?Zþ[ÿV_†¼YðjÛºD¥¶Œuì¡K£&ŠÎÆP€>[Ê;Žìó“šÔÿ„Ëàgüúèø"oþ3@ü!¿?çëCÿÁëñêÃ×´†úV³á ü6šúƒxŽÉ$º‘¸o+q'*]°7çžµ¹ÿ —ÀÏùõÐÿðDßüf°õíᾫ¬øB榠¾#²y ®šmÛÊÜAË\Åxϧ¥{åQ@8üt†Êo‰öK|PD4d#{í¼é;ä{מýƒÃ¿ß·ÿÀ“ÿÅW¶x×Sð¦•ñ²üa£éíáÅHÅÕ¡¸_7í,F+`í Î=}jOøL¾Ï®‡ÿ‚&ÿã5ÏR„§+©´{,Ò–—³•M÷k_Èðÿ°xwûöÿøøª>ÁáßïÛÿàIÿâ«Ü?á2øÿ>ºþ›ÿŒÑÿ —ÀÏùõÐÿðDßüf³ú¬¿çã:ÿ·¨Ð%?¹‘Ð|ÿ’C¡ÛÇþ”I^^ðKþI…ÿoúQ%zv8QEQEâþø‰á_ê>1°×5_²]Iâ[Ù•>Ï,™BT”R:©ü«Ú+ÅüãOøG5ÙÿÂ3â=S‰oeó´Ë>5ÉQ´¶á†ã8ô#Ö€:ø]¿?èaÿÉ+þ7Gü.߇Ÿô0ÿä•Çÿ£þŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þ΀8ÿŠ_¼â?‡¶“¤ë?h¾ŸÉòâû,É»lÈÇ–@žM{…xÅ/ÿmü8Õ´ïøD¼Waçy?é7úo• bdo™·gäW¸P^UñKX°Ð<à=OSŸÈ³ƒûCÌ“c6ÝÑ"ŽÔŽÕêµåÿ5_ìOˆþÔ~Á}äÿhÿ£XCæÌù…å\Œã9>ÀÖUèÆ½)R–ÒM}êÃNÎä_ð¸|ÿAßü”Ÿÿˆ£þ€ÿè;ÿ’“ÿñ«ÿ Oþ¤/ÿàŸÿ³£þŸýH^9ÿÁ?ÿg_3þ§àžzÿäM¾±#+þ€ÿè;ÿ’“ÿñ|-Ö,5ÿøóSÓ'óìçþÏòäØË»lN§†õµjÿÂÓÿ© Ç?ø'ÿìë?ᾫý·ñÇÚØ/¬<ïìïôkø|©“ºüË“Œã#ØŠô2܇ —ÕuiJMµmZîŸD»:®jÌõ (¢½³3çÿ†ÿ<% xLÓ5=[ȼƒÍó#û<­·t®Ã•R:Þº¿ø\>ÿ ïþJOÿÄW™ø'S¿·ðŒPø_ÄW±¯™‹‹==¤‰ÿxßuç¸5ÐlêŸô%x·ÿMþ5𸼎q*{)»¶ô’¶¯§ºyu3Œu9¸C ÚNÉß=ºgü.ÿÐwÿ%'ÿâ+›ñ_|)ã;ï i:uìwìÞ!³i ’Ú@­Y[;Ô>`1ïUÿ¶uOú¼[ÿ‚¦ÿÏ»¿¼ºñ„’ãÃÚæœƒÄdMdaF;þè$õï@kL¿% T**sV{¹&¿ô•ùŽŽm­5N¦Å=Ýöüm¶ð…,|Am®Ùhv–š…²ŠKPbP`rŠB†#$ÓÐcK\Ð4ŸiâÃY±ŠòØH²ˆäOÈî=Á ðH­*+íLñ=zûÂÿ¾3¤²Åi¤XMáี´*­)¸'•zá:ã°ö­ø\>ÿ ïþJOÿÄW3ñc]·ðÿÅË[»˜g•BHÂÀ¡›&w9äŽ8®sþn‘ÿ>§ýùOþ.¾[6ȧĺʌå¢Ö-[ÿI™çâsLV§³¥Cw½¿CÒápøþƒ¿ù)?ÿGü.ÿÐwÿ%'ÿâ+Í¿áféóáªß”ÿâèÿ…›¤φ©ÿ~Sÿ‹¯7ýU©ÿ@Õ~õÿÈÿÛ¸ÿúÿÉ¿à©ðbò GIñEõ¬žeµÏˆîæ‰ðFäeŒƒƒÈàŽµéUåŸî’÷ÂÚõÜjÊ“ë·*¸ÃÉûó^§_oJ”ãZÉ´då&¬ØWÏÿ þ$xK@ð™¦jz·‘y›æGöy[né]‡*¤t#½}_$øWÇZvá»K í/ä–-ùh£R§.Ç‚Xz×›œåòÇPT£ NÎöŽû5}žšö0Åbëa`§FŸ;½­{|Ïaÿ…Ãà?úÿä¤ÿüEð¸|ÿAßü”Ÿÿˆ¯6ÿ…›¤φ©ÿ~Sÿ‹£þn‘ÿ>§ýùOþ.¾kýU©ÿ@Õ~õÿÈÛ¸ÿúÿÉ¿àwŠþ$xK_п³4Í[ϼžî×Ëìò®í·±å”€÷¯p¯”õˆz]倉,µ"xd%âP0’«âôS]Æ‹ûCÍ«ø‚ÖÍ|#vðH’ŠÆSspÌå(»TlCœ¸o¥É²ù`h:R„¡w{K}’¾ËM;ø\]lTëSäwµ¯™î•å_µ‹ ÇþÔõ9ü‹8?´<É63mÝ(áA=HíT<_ñÊm ÞÅ­|)¬ÚO-À.šÕ™·YaQóˆØ9;òSœ3ȧýùOþ.øYºGüøjŸ÷å?øºø¿õV§ýUû×ÿ y_Û¸ÿúÿÉ¿à“ÿ ‡ÀôÿÉIÿøŠ·ðbò GIñEõ¬žeµÏˆîæ‰ðFäeŒƒƒÈàŽµåð³tùðÕ?ïÊñuéî’÷ÂÚõÜjÊ“ë·*¸ÃÉûó^ÖK“ËRR•)Âëí?ËÝG^0Äâ›j<‰yÞÿêuÏøïþIç‰ìuÿ¢šº çüwÿ$óÄ¿ö ºÿÑM_Bvœ?„þ/øLðn‡ay®ùwVº}¼3'Ù';]cPÃ!0pAé[ð»~ÐÃÿ’Wün¼WHø‡¥ÙhÖ6’Yj,ð[Ç2D¥IU‘ótâ®ÂÍÒ?çÃTÿ¿)ÿÅ׫˜ßL$šþ¼ y¦22ia›ÿ·¿à½ÿ ·áçý ?ù%qÿÆë—ñ‡ÄO ø»Qðu†‡ªý®ê?ÙLÉöycÂÀœºÕ‡ç\Oü,Ý#þ|5OûòŸü]GŒ,|A⟠Z[ZÞDé®ÙÈZxÕWñŽóÍU:¸÷$ªa¥õo§ài‡Ìqu*Æø§Öû~ÔTQEvžÉäþ$ñNáޱßë—Ÿdµ“ÃB)äË›’@Âz)ü«cþoÃÏúòJãÿ×;ã?éþøÛ§©[ÞO ¾궈¬ÁË6HfSßÒ¦ÿ†ˆð¯ýfbE?9 OáŠåJú#™&Ý‘«ÿ Oþ¤/ÿàŸÿ³£þŸýH^9ÿÁ?ÿgYð³ü]ÿDëÿ+qñÂÏñwý¯ü­ÅÿÄUûŸÊþãOaWù_ÜjÂÓÿ© Ç?ø'ÿìë?ᾫý·ñÇÚØ/¬<ïìïôkø|©“ºüË“Œã#ØŠþ‹¿èån/þ"“áv©y¬øÿÇš…þýs/ö~û_³ÿ„gÄz^ÏÙKçjvDm‚ÃhmÇ-Îqè¥uð–üCÿ¢aÿ•ûð®_Æ犵=GÁÐëžþŵ_Ù:ÜiÅs¹òÀ&Ôœû{дQEó—Ç-?PÔ¾(Yæé×—ó.ŒŒÑZ@Ò°_:A¸…ÆH÷çÿðŠx«þ…/àº_ð¯uñ&£¬éŸc›Cж®›ÃAßí‰mµ>ÒI}Î0p@÷ö­øK~!ÿÑ0ÿÊý¿øW±‚Ïqx*^Æ‹Vô3•(ÉÝŸ9Â)â¯ú¼Aÿ‚éÂøEE, Õáž!š=fe’)«#âH<‚W¬×›üšâãNñd×–¿dº“Ä·5¿˜$òœˆË&áÃ`äduÅzE|ôæç''»6Z|-f—Ñ YÝH¼á’"Aç־鯔<#áOj^³»Ò|7öË7ùsýº÷áØ•ŽF#ŸJõr\bÂbG>[«^×ê¨Ò¡V\µåÊ¿SŒòîÿè{ÿ~M]ßýïïɯNÿ„âý ÿùS·ÿâ¨ÿ„âý ÿùS·ÿâ«éÿÖÐGþHÿÈéúŽ[ÿ?_ÜÿÈó­9.¯`^Îê5û\9gˆ€>uï_dÚèZ=Ž£>£g¥XÛßO»Î¹†ÝI7͹€ÉÉœõ5ó†¡áOi°Áw«xoìv1ÝÛù“ýº6fdåS“’@ãÖ¾Ÿ¯˜Î±‹ˆUù¬­{[«9«R¡J\´%Ì¿Rž¥¤éºÍºÛêš}¥ô áÖ;¨VU ‚23‚F}Íxí n–Íàûk;m±D—IG€ª¨èì; ÷Êñ_ްÞOâÅakö«–ûnÈ|À›¾H³Éàq“øW~ÏNwµš}úö3„#9(Ëfx?—wÿ@ûßûòhòîÿè{ÿ~Mw_Ù.ÿ¡gÿ'âÿ?²<]ÿBÏþOÅþ5÷_ÛOþäüKû+üòÿÀ_ù/—wÿ@ûßûòkèoÙß#Àš eeoí‰r¬0Gî¢à×™ÿdx»þ…ŸüŸ‹ükÔþ¤ÑøO\Kˆ¼©×\¸G¸6ÆòâÈÈàà÷¯ =Çýfœ´æ³þV¿C›ƒ¡‡IÒ“wîšüÑêµÏøïþIç‰ìuÿ¢šº çüwÿ$óÄ¿ö ºÿÑM_4rÀ—&Þ2¶7l»Fa$:Š“Ë»ÿ }ïýù5×éz_Š$Ò,žßÃÞl äûlK½vŒ‘‘Ú­ÿdx»þ…ŸüŸ‹ükï¨ç.4â½¶É}‡þG¥¯$›œ¿ðþG åÝÿÐ>÷þüšÕð¢Î¾;ðÁ’Öâ%þصù¤Œ¨ÿZ¼WKý‘âïúò~/ñª—Þ(Óõ}æO¬s&­nm‘¯#a4Á²ˆpxÉO¹³ ÕÕÂΟµ½ÖÜ~6&®]„§8NM¯'þGÖtWˆèÿþ*Þë‘A/ÃÆ{g/û¶·šÔãi#3Hv/n£žƒŠÖñ'Žþ'iútrÙü7òd2…-ö¡‘‚qåÆÌNN¤WÅyÁ|~7Ä«2Êߨñü±®ãþº^k̼»¿úÞÿß“]ÿ‰¯Söœº½9[ülwPÀa«Cž¬š~I¿Ðá|»¿úÞÿß“G—wÿ@ûßûòkºþÈñwý ?ù?øÑý‘âïúò~/ñ¯WûiÿÏïü‘ÿ‘·öV ùåÿ€¿ò=£à—ü’ þÞ?ô¢Jô óÿ‚_òHt/ûxÿÒ‰+Ð+óÓË (¢€ (®úúmÇúýÜ7]ÜÅâÓX2ùó#Bò¹Î@ne;¸Û´¼¯ÿ„ÇÚ~³­Í¤Üxi­5 N{äo9‘DÕÇ@=yÏ5{]×m¼E „#MÑ Ñ, ªé÷sÚNÞLÊRO>tRn_˜ä¯O^ÃÀ7k}à2å4ÛM=$G+œ-oaæFŒªB?úÀÎg=MFN.ñ*”âìÎþ/ŠóÛÁÿ÷ÕÏøW'¿â üJ|<šÿƒ¨.×ÉxïyªH)½.r09äà äWÑUóþ¢k¶~ðDž¾ŽóIñ,W÷:œ0ƒgq –c/›Ç͵€ó„†ùŸX«üÆßZ­üÆ–¹ðëâF¿£Ï¦]\øQ Ÿnæ‰î ¬c*GP;W·QEg9ÊnòfS©)»ÉÜ+€ø‰áx‡Xðö§áù´ÄŸKûNåÔ@­æª(ÆÅ$ð¸íÖ»ú)&Óº%6Ñä?ð‰|PÿžÞÿ¾®³õ?ÇžÓžÿWÕ<ej¹æ–än µF2Í€p£$ã^Ý\ß´˜uŸëVï§Ç}:ÙNö±´"VyL Á;òpçž+_¬Uþc­VþcÎtâV³£Xê–ïá5‚öÞ;ˆÖCr+¨`Áõ5Õü;ð‡ˆ<=¬x‡SñÚcϪ}›jéí!Uò•Ôçz‚8+Ü÷é]‚àš×À¾·¸ŠHg‹L¶I#‘J²0‰AAŒVåLêÎjÒdNµI«IÜ(¢ŠÌÈò xo⟄|9i¡ØIàÙ-mwìyÚè¹Üìç$:±í[ñwÿêFÿɺô (Åôˆ¾2×õÇÑt½oáýÍúïÄj·À>ß½±ˆ ýÏÊN@$p3[†þ#øQп·$ð¢X麭¾ ßak#ydä àƒÁÏí[üŸÝÞÉ»n7}Ä8Æåëë^ý^WñkKÓu}SGXðωõKHmîJM¢F²‘ö( ½r1¸@È ×ë˜ßëU¿˜ÅÑ4ïøGƒVÒo¼qc>ï.\]¦í¬TðÊäÈ®ëᯅµo hÚœ:Ô¶R]ßjrßfÎcÕ>`U>¼cš±ðÊ ^×ᾇo®E$7ñ[ì1È¡Y# D@Јö }yÍu•©9üLÎu§SI;…eø—M›Y𮯥۴k=í”Öñ´„… èT€N2} jQPfx͇¾&iÚu­Œ3øI¢¶‰!BïrX… ázñSIáo‰±FÒIqàÔD™™î@u$⽂²|S§Ï«xGZÓmB›‹» àˆ1À.ñ²ŒžÜšÙW¨º›¬Me§1â~Ô|Sâû‰íô-oÁ—SÀ䌭ìm´œd@HÎ#8ÈÏQ]ü >Ô5mZãÃKi§êp_8´yÄŒ#l7.:éÎ9¨~è÷_‹odÓt sFÑ&Ò¡qjÑ…-z­€cb2í2€Xä(Ø´RuªIY±KVJÎZQYžiãOø«SñÜ>"ðìú2ªé‹bé¨< çÍg$_uïëÅfÂ%ñCþ{x?þú¹ÿ õê+HÕœU¢Ía^¤¢ìž|I«ø›Â:ŒvæµàûK©"*y7Òe ¤duSùVƉ§xÿÄz<¶“}àû‹÷yrâí7mb§†PG ŽEwߥñÐìl<8ÓÃq©jÙ\]ÁJö>íòŒ·1Æ3ÁuA ­¼VöñG  HãBª(ÀqН¬Uþbþµ[ùŽoá߆ï<#àM7C¿’ .­|Ýï‡t®ã€z0í]EV'8QEW­ÿÂÇþØŸûþOìÏ—ÉûÚ<ïº7nÙòýíØÇlWaE|ùªx_ã…ÇŒZé59áe´ÔBYF8é ”^„%°s»95lþ*|FÒîu-6ê}Q¸²½–ÚIî``w!ÚBù{\‚A#<ýú6¾JÕb’Oø¨¥Ô±í»±µþ°óʚƽOgcÐË0oˆT’¾Œìÿáqxûþ|ü5ÿ~§ÿâèÿ…Åãïùóð×ýúŸÿ‹®ìÓÿÏýÇýóÿGÙ§ÿŸûûæ?þ"¸þº»ŸIþ¬KùÞ¿Ììï~6øîÆÑîe±ðáDÆBÅ>y8ÿžžõôM|e®Á*h× ×“Hß•‚`üÃÑA¯³k®…_kcçólÁVTÚµÕ÷¿WþA^oñSǚ߃nô+mßO•õ´o7¨ìËF6°ÇÞ>½«Ò+Ä>>«>³àõY2~Ûó.2>X½A¬åË.Ç—¶­ ÌÒûÙ™ÿ ‹Çßóçá¯ûõ?ÿGü./ÏŸ†¿ïÔÿü]p¿fŸþî?ï˜ÿøŠ>Í?üÿÜß1ÿñçýuw>·ýX—ò?½™ÝÂâñ÷üùøkþýOÿÅ×uð¯ÇšßŒ®õÛmjßO‰ôï³ì6HêÌNw1ÏÝëÂþÍ?üÿÜß1ÿñé?eŽËQñ¬÷W!b…,ÞYæePªRKwö­èb=¬¹ny™¶NðTUG]Ûtú?ò=Ί§¦êÚn³n×^¡i}¹F’Öe•C`¤ŒàƒqQÚëº=ö£>gªØÜ_A»Î¶†áHö­¹AÈÁ ô5Ö|ñàÖ_|w}h—1Xøp#碟<ÏOjŸþ¿çÏÃ_÷êþ.¼ÓB‚WÑ­Ùo&Œß*„Àùª“Z?fŸþî?ï˜ÿøŠóçŒQ“ö>» ò­F9>$žëª;¯ø\^>ÿŸ? ß©ÿøº³¥üañœÞ"Ѭo¬ôm}¨Ai!‚)·€î#/Œã>µçŸfŸþî?ï˜ÿøŠŸJŠHücáR÷RÊ?¶í×úÁÏ *©âùæ£s~ÿ¿SÿñtÂâñ÷üùøkþýOÿÅ× öiÿçþãþùÿˆ£ìÓÿÏýÇýóÿXýuw=õb_ÈþõþgП |aªxÓ÷×Ú´6q\ÛjZbÑYP…D9ù˜œåé]ÅyGÀWÁÚÈ,XnpXã'÷qsÅz½z1wIŸR<“q켟ÂÞ$ø§âïZk–ø6;[­ûuº6»!ÈŽª{׬W“ü ѯîþèÓÃâmVÊ6óñÚ”LO à¼,Üõäž¾œS Øÿ‹¿ÿR7þMÑÿþ¤oü›®ƒþíSþ‡=sþüÙò=ðjŸô9ëŸ÷æËÿ‘è‘Õõ¿Š°–¦ž{$žå[ardÄ’,.âVMzuy¿Ä-þÓÁÓO7‰µ[ØÖîÏ0O¨GÍÌC’«qׂ:zq^‘@pþ7ñ'‰4ÏøoCðÔzS]jÿjËêK!Eò‘_ªŒ‚Ýn•ÜW›øîÞ[¿Š_`†ö{)ûKÀºb<V^zr_^hÇü]ÿú‘¿ònø»ÿõ#äÝtðjŸô9ëŸ÷æËÿ‘èÿ„{Tÿ¡Ï\ÿ¿6_ü@ÿü]ÿú‘¿òn´>x“Xñ¬nGb—Ún«6žßaW·–$o$žIçŽ1ÅhÂ=ªÐç®ß›/þG®oá$m ¿Œ"y¤ÓŪÒÈ3æ;@=xz@‰Y~%ԦѼ+«ê–ëOee5Äk %K"àƒŒQZ•ÏøïþIç‰ìuÿ¢š€/u,£ûnÐmp€¬𢮞/žj70Æðü°øyÕåµ—t}kEWqò§“üHø‘â? xÆßEÑm´©"“O[¶kØä-“#¡«>QÛÖ¹_ø\^>ÿŸ? ß©ÿøºãJ<Ÿ-LñìD;)?ëäãk‰û4ÿóÿqÿ|ÇÿÄW|O³Ÿ-ϤË2GŒÃªª7ÕõGuÿ ‹Çßóçá¯ûõ?ÿGü./ÏŸ†¿ïÔÿü]p¿fŸþî?ï˜ÿøŠ>Í?üÿÜß1ÿñ×WsÐÿV%üï_æ})ðïÄ—ž.ð&›®_ÇwW^nôHA¶WA€I=w®¢¼ÿà—ü’ þÞ?ô¢Jô ô‹ (¢€ (¢€ ù¦Zx‡Äž-»¸Ô5wM~ò –ÓRç$<óü«éjðoø&k/¼“\×, xŽö/*ÂìD‡ â6ž~lgÐ òsªþà çÍ˪Ö×:°u=^k~†ü+ 7þƒ:çþ/ÿGü+ 7þƒ:çþ/ÿ]oü*›_ú|YÿƒÿÄQÿ ¦×þ†ßàÄññßÚËþ‚þzß]_ÊþóÍ+ðNŸ4ÓÿnÌ0WHÛ‚Aô¯b¯(ø©¤®¹ãÏiÏwwh³hf{9<¹W#|­ƒŒãØšôq“äÃTíh·~ÚôeËR/³Gÿ ÃMÿ Î¹ÿKÿÄQÿ ÃMÿ Î¹ÿKÿÄW[ÿ ¦×þ†ßàÄñ©µÿ¡·ÅŸø1üE|ö²ÿ ‡ÿ€žç×Wò¿¼ä¿áXi¿ô×?ð)øŠÆÑ>Eâßø‹O‡]žÖ=/ìØ’xÃËæ¡nHeÆ6þ¾Õèßðªmèmñgþ GÿRü+ÒWCñçŽôä»»»X³ñ=äžd­˜¾fÀÎ3ì{y9b12‡µr÷[µ­ÕxÜB©M+5¯{”¼ð[RÑ´i­õOë63µÃ:Ç¡j-v¨Ë@wäO ZÇOÙÊkÝfîóTñ,žD—®ê«šy¡-œ¼­´ H''k óÏJ÷Ê+ë,ùcÁ¾±Ö|)e¨M©ê°É/™˜àU—€TúV÷ü+ 7þƒ:çþ/ÿR|9ø}¹à=7Qx†Ñ¦ósèŽ%Ä®¿*í8Î2}É®§þM¯ý ¾,ÿÁˆÿâ+àq™š†&¤=»V“VåÛSÜ£‹Q§g¢]NKþ†›ÿAsÿ—ÿˆªSø:ÓÃÞ$ð•ݾ¡¨Ü;ëöq¹˜:€_9(çç]×ü*›_ú|YÿƒÿÄVˆ<†õ^G®k—åüGe•v%A–'pG?.3èMk—fJ®*öÍÝíÊN#§JQ³ûÏy¢Š+îOðωº !øÃoiqsun‰ ,¡í¤Ä‹‡$ƒÇ?ʲáXi¿ô×?ð)øŠéügáøüIñ¶95FÀ'‡_6Â)Î.Xm'›8õ¦ÿ…SkÿCo‹?ðb?øŠøìë¨bœ=«Ž‹K\õ°x•N—-Ÿßc’ÿ…a¦ÿÐg\ÿÀ¥ÿâ(ÿ…a¦ÿÐg\ÿÀ¥ÿâ+­ÿ…SkÿCo‹?ðb?øŠ?áTÚÿÐÛâÏüþ"¼ŸíeÿAÿ:¾º¿•ýå¿–«cá¿Ú#»¤ýÌJò³HÆIîx¯Q¯$øU©èÞÑüEgªkvÖÁ|GyRê7h6ň%ˆÜy‘Üû×¥èšÞâ= [I¸ûEŒû¼¹v2nÚÅO ‚9úÍN/É ÝäÙ¡_4øC»¼ð^Ÿ<^)ñ%Š?™‹{=@ÅbF*ãŒã'Üš÷k_øZóQŸN‹^±[ènÚÉ­¦”E!˜¥U_¹8r èMx×߇ÐkžÓuñˆmo70YÞˆâ\Jëò®ÓŒã'ÜšòsÜSÃaã5>Oz×Jý†øG7Ï«_ðŒßÿÐïãü·øQÿÍÿýþ0ÿÁ«…kÿ©µÿ¡·ÅŸø1üEðªmèmñgþ Gÿ_+ý´ÿè%ÿà,ô9ðÿóèåuíîÏLIåñO‰/‘.í³oy¨b|΃æ\sŒä{_CW…x£áô‰ý¢ž"ñ ÛCwkˆ//D‘6g~eÚ3Œä{^ë_U‘bž')¹óûÍ]«tZ~-ÁÍrFÊÁ^GñzÚKÏx¯¯,]þß‹‹9|©SÆ~Víœ`û^¹^QñSI]sÇžÓžîîÑfþÐÌöryr®"Fù[Ʊ5èã'ìðõ'{Z-ß¶›˜Ó·:ºº¹‡ÿÍÿýþ0ÿÁ«…ðŒßÿÐïãü·øV¿ü*›_ú|YÿƒÿÄQÿ ¦×þ†ßàÄñðŸÛOþ‚_þÏ[Ÿÿ>ŒøFoÿèwñ‡þ [ü+¥ø#A ø’'žiÙÞ_&ßUÙ{£Sµo 3€; Øÿ…YÿSïŽðqÿØTòG±ºÄÖZ)¿½™?ð ¼=ÿAÿÿàdün¹ÿü1Ò¼«ø?Q±Ôõ{©eñ”/gW@ È?(ïÜ×mÿ ³þ§ßÿàãÿ°®_Æ ÿ„sQðuçü$Þ#Õ7ø–Ê/'S¿óã\–;‚ín1ŸB}hPŠÙ XŠÒV”Û^¬öŠ(¢¨ÄñÿxBËÆŸâÓ¯®ï­b‹Ã‚pöRrEË. *xùn­‚ð÷ýüKÿ‘ÿñºo‰4øHþ:Çgý¯ªé{<4%ó´ËŸ"FÅÉK`åyÎ=@ô­øUŸõ>øçÿý…KŒ^èÖêÁZ2iz™?ð ¼=ÿAÿÿàdünøP^ÿ ÿ‰ð2?þ7Zßð«?ê}ñÏþ?û ?áVÔûãŸüörG±_Z¯üïïaðKþI…ÿoúQ%zyÿÁ/ù$:ý¼éD•èFEPEP^/àÿ‡~ñv£ãýsJû]Ô~%½…_íÇ„HF«ν¢¼A×þéZÏ‹àñ„:kê â;׌Ýi¦á¼­À 0FÀÜŒúúÐyÿ Káçý ßù;qÿÇ(ÿ…%ðóþ…ïü¸ÿã•ÏÿÂeð3þ}t?ü7ÿ£þ/Ÿóë¡ÿà‰¿øÍgüRø[àß|8Õµm'Fû=ôO—/Ú¦}»¦E<3x$r+Ü+爞%øS¨xRµðÕ¾”š»ù_g0i- ŒJ…°æ1”7~zWÐôW—üHÑ4ïüGð“«[ý¢Æí2-ì›¶ÂŒ9RäÁ¯P¯'ø­{¡iþ;ð-׉RÒûCíx ÈsÊsóíÇZØÿ…%ðóþ…ïü¸ÿã”Â’øyÿB÷þNÜñÊçÿá2øÿ>ºþ›ÿŒÑÿ —ÀÏùõÐÿðDßüf€:øR_?è^ÿÉÛþ9Yÿ ôM;ßü}¤é6ÿg±ƒû;Ë‹{>ÝлX’y$òk?þ/Ÿóë¡ÿà‰¿øÍXøS{¡j;ñÝ׆’Òû?ìâ (1†Â1óíÏZõŠ(¢€>TðƒôW¶W·¶>mÄ›÷¿œëœ;À`:[Ÿð¯ü/ÿ@Ïü˜—ÿН·’Õ`Q ]üç)žõ'eèŸ÷ÇÿZ¶\'*ëÛhÆ<Úòßkëo‹¦ÇÍ×Ë1s«)G$›zk¦»|G¯¿ð¿ý?òb_þ*ªÉá}Dñ/„îtë?&f×ìã-æ»eKçb{^WçYz'ýñÿÖ­ŸInÞ>ðÀˆ.ïí{^‹Ž<Õ¢\-,*öï0ŒùuåO/‰•…ËqTëFs¯)%ÑÞÏÿ&>Ô¢Š+è ø¡£Øk-­µ<èW@Yïeà ‡ʑؚÅÿ…áúäÄ¿üUUý¡%øbeÆßìˆúŒóçK^WçYz'ýñÿÖ­aÒǯn±ª—NVûuø–ç…ÀbkÖç§ZQ]•íù£×¿á_ø_þŸù1/ÿGü+ÿ ÿÐ3ÿ&%ÿâ«È|ë/Dÿ¾?úÔyÖ^‰ÿ|õª¿ÔÉÐÎ?ÿlqdãè&ÿ$zǃ¾ é4Ñõë•Ô.ôû»]^k;} IÆ›ܧ Ç FwŽÇžþ‹¢|ðm޶­eý©|›¼ËÏ6h<̱#äY00uÆ{Õ/ÙÔ©ð¤Wî^\}<¨«×kgî^öÒýüϧ¦œ`“w×UñW‚¬¯bómäûvôÜW8HÈäz^Ç^ ûIYü(dÆÏô¼äg´5*·~Ç›—›Nn×Òÿ-κð”éJ1vm={i¹—ÿ ÿÂÿô ÿɉøª?á_ø_þŸù1/ÿ^CçYz'ýñÿ֣βôOûãÿ­[©’ÿ¡œ~ÿþØù¯ìœoýÏñÿä^ÿ…áúäÄ¿üUw¿íâ´ð׈- ]Ã¯ÜÆ‹’p¡#dûWÌžu—¢ßýjõÏ…_|7àOÞZêFêk¹µG•-­aÜÂ3ùb«Œ©ÎzqŽj'‘¼·ßxµ[›K'{yîÏO-Á×ÃÎN­YNýïú¶}#\ÿŽÿäžx—þÁW_ú)«—±øëà»8ç›TžÊFÎ`žÒBé‚G%—ž¼×׊æõ¯Žžñ…uí-R³žãL¸Žº·]²HÈUPlf ’z$Vg®qÚ/ü9w¡i×3éÛæšÚ9¼ùX¨$à7­^ÿ…áúäÄ¿üUxürÚ0MÁF~Nÿ•;βôOûãÿ­[®•OûJ*úÚûy|GÌTÊñ’›k5÷ÿòG¯¿ð¿ý?òb_þ*ªÉá}Dñ/„îtë?&f×ìã-æ»eKçb{^WçYz'ýñÿÖ­ŸInÞ>ðÀˆ.ïí{^‹Ž<Õ¥.–{w˜F|ºò§¿—ÄÍp¹n*hÎuå$º;ÙÿäÇÚ”QEb}á_4{ oãµ¶£ è ]ì¸apã9R;X¿ð¯ü/ÿ@Ïü˜—ÿŠª¿´#D¿ìL¸Ûý‘Qž|ékÊüë/Dÿ¾?úÕ¬8rXõíÖ5RéÊßn¿Üð±ø MzÜôëJ+²½¿4z÷ü+ÿ ÿÐ3ÿ&%ÿâ¨ÿ…áúäÄ¿üUyeèŸ÷ÇÿZ:ËÑ?ïþµWú™/úÇïÿíŽ/ìœoýÏñÿä«~ É!пíãÿJ$¯@¯?ø%ÿ$‡Bÿ·ý(’½°>¨(¢Š(¢Š+ä_W±°ñ—ŠbºŸËs­Ý°ñæAí_[×ÎV>/дx¶×S¾ò'^H«å;eKœª‘ÔîËñÃÖö‘jþ{~hìÀUt«s&—¯ü:8ßøItùûÿÈoþÂK¤ÏßþCð¯Cÿ…—áú ÿä´¿üMð²ü#ÿAü–—ÿ‰¯{ûj·óCîü‘îhOùã÷öÇ–kšæy£Ïåm¸]Œ3†¸ö¯³ëæ/xãÚǃïì,5:æ_/by.q"“ÉP:_Nׇ™b¥‰ª§&ž–ÓÕù³ÆÌk:ÕT›ONŸ?6â®a³Ö<<ï²%ûn[ã+íõ¯o¯ øÅ©Ùèþ1ðEýüÞM´_oÞûKc1ÆÔŠäÃÍÓ«®?Äå¡.Z±’èÑãÿð’éó÷ÿßü(ÿ„—HÿŸ¿ü†ÿá^‡ÿ /Â?ôÿÉiøš?áeøGþ‚ÿù-/ÿ_Oýµ[ù¡÷?þHú?í ÿ<~ïþØóÏøItùûÿÈoþêna¼ÖçÿÉÏö„ÿž?wÿlyâëšuåͤ\o•®àÂìaœJ§¸ö¯¯ëç-oÇÖ,#°°Ô|ë™ní¶'‘"ç¡<• 5ômxy–*Xšªriém=_›|ùããUõµ‡ÅK9n¤òÐ舠í'Ÿ>OO¥pÿð’éó÷ÿßü+Ô~"ëšvñ–ÞëS¸ò ¬jÛ²ÆáÎ0 ž€ÕøY~ÿ ¿þKKÿÄ×½—æU0ô}œ\mç¿æo‹•*<ªQ^¿ðèóÏøItùûÿÈoþÂK¤ÏßþCð¯Cÿ…—áú ÿä´¿üMð²ü#ÿAü–—ÿ‰®ïíªß͹ÿòGoö„ÿž?wÿlwŸ¿äè_öñÿ¥W WÍž øß¦ø3áæ—¢E£ÝßjÏ —t‹[ZG|«|ÄŸ™F ŽüñÏ£é_ü{¥ÛÜ_ê2i÷n™–ÕíåÄÝÆäBwÐŒ€rÉŸ.ze^ÂúßSÓ­¯ìäó-n¢I¡}¤nF©ÁädÖ¬P\úxŠUñ†©¥]Û}ŸO³²·¸[ÉKHò)ïÎÀÊ9GÏ3ÐV]ç‡4Fö{»ëîž{t¶š9Éx¤ºˆ„«C‘“ƒ@þ)ñiÓ쬆‰5¥ÕÅÝêZ—Xä»)G‘ŸÊ‡.çlL‚99$k[Ã:œÚdžìoî^Íçš<ÈÖNí`H8ÞªËÈåXeNTäŒÕWð?†¼¥ŽÛH‚Ãl¢`úik7ÞÐð•còÈã¯ñóøºŽ7ö¶ñ[Ûø8`‰Gz¢*¢€à8Åv¿¾"Ëã¹õx'ѳ$Ó¼œµ ·ùÏe\cg¿ZjIìÈWœZõGwES3 +íÿhBîžﳃýª£88ïKÿ ãUÿ¡ÿ*ËÿÆêyãÜÙaë5u÷2ÿ†-õ[_‰ÐZÁãMGZ[u¸þÛk¹BÛ<“VÑs‡B2ûxP¸Êä¥zí|÷cñ2ÃL¼ŽòÃá6•iuvMÔºäpÂ,Œ‚Gã]›ñÊö÷\ÒôëŸ}•/ï"´i+ì.Ás´GÎ3œqÓ­ q{0–¬Uå—£=ŽŠ(ª1 +ÍüyñRãÁ¾'ƒE¶ð÷ö“Éf·fO¶ˆv‚ì˜ÁCŸ»×=úW7ÿ ãUÿ¡ÿ*ËÿÆê\¢·f°£Vjñ‹kÐî¾"Y5î—n±ø’óI–3,‘ÛYßGi%ûˆØ¬BVû¼à瑌ä ÓøC}w{àû•¿Õ.o®mµ‹wK™ÒZín"i‡`ÛÇ>â¸]Kãë6ëoª|6´¾\:Çu¨G*†Á¢#8$gÜÔ–?®tË8ìì>AikvC¥h¹$œ($“øÑÏå}Z¿ò?¹žéEr_wØm%¹ò·mß± mÎ3Œg™™¡Y^'¾¹Ó<'¬êxûU­ŒóC•Ü7ª^;òäÑ|}Ô¦‰%ÁGPʵ—í;þÆ«ÿB7þU—ÿÔóǹ²ÃVz¨?¹‡Á»Ý~ןŽâ-zïSš}2ël_­ä7aŽü’ …88có †½²¼Lø¶š'›ý“ðÆÆÃÎÇ™öKè¢ßŒã;bÆO_S[:oÇ+ÛÝsKÓ®|!öT¿¼ŠÐMý¤¯°»ÎÑ8ÎqÇN´)ÅìÂXz±W”Z^Œö:(¢¨Ä(¯7ñçÅKøž ÛÃßÚO%šÝ™>Ú!Ú ²c~ï\÷é\ßü/Wþ„oü«/ÿ©rŠÝšÂY«Æ-¯C¤ø«moeg·.»â»{‡Ùcg¦è—†!u;(0¾cÎ[Ñ@œÚxsI›Bðýž›q©ÝêsÀ„Iyv夕‰$’I'8'“Œ×Ý|j¹¾ò>Ùðò "Už;Rü¹îºæ>dàŽEXÿ…ñªÿÐÿ•eÿãtsǹ_V¯üîgªk>ðÿˆw_E±½‘¢0ùÓ@¦ECžñ¹z’0F Èæ´,,môÍ:ÚÂÎ?.ÖÖ$†ÜNÔPŒžNkÁ>'ÿ„ÇÂ:ÿØþÇö¯3÷o™·lŒŸ{9Ûžë ª0 (¢€ (¢€ ùZº½‡Æ~)[{´!Ö®É8&˜xÁ¯®ëå©ü;®êþ*ñ]Æ™&œ°.»xŒ.YÃnßž6‚1‚?ZåÆU*\Òizž–Sìþ²½«iYü6¿â™Ïý¿Tÿ ?þL¯øQöýSþ€ÿù2¿á]ü!>.ÿžúý÷/ÿGü!>.ÿžúý÷/ÿ^GöæÞÏ©¶ùê}ÑÿäN;Y»¿—I&Ó|˜ÎÜÉç«cæ…z…Ä/‹1üC´²Ôô9\<©"KX§1Æwys¸b@ >Cz®'ÅñáË»»ùt¦¶fñH_—P1‘ޤW×5êàkƵ7(´õè|Öq콺öNM[íZû¾ÉhxŸÄ|IÓôk3oáI4 î/RçŽò æ™™[Â’FsþÎ;×x»Ä~:Öeðä¾)оÏ%·Ú"µ/[IrÛ#ó•û‡!UrH0>±¯øåcy¨øÁ–– ÜÉöí†rBp‘“œsÐè­%r“è™Á…åöðæ½®¶ß~žg‘}¿Tÿ ?þL¯øQöýSþ€ÿù2¿á]ü!>.ÿžúý÷/ÿGü!>.ÿžúý÷/ÿ^öæÞÏ´¶ùê}ÑÿäNsíú§ýÿòe½Sö~’Yuo¼Ðù2±f=Á±òËÜWÿO‹¿ç¾‡ÿ}ËÿÄ×uð6ÆóNñŒí/Ú¹ì;Ì”å$#ç¡ÙÅÓ­QÆ-=:_Èòs…‡ö ÙJmßí$–ϲZžÑEWª|Ññfw“C¦ùÑØ“ÏUÏÌ{½öýSþ€ÿù2¿áW´»°—J[i7ì´øv8ê kÿÂâïùï¡ÿßrÿñ5áVÇÒIEÊ:7Üû<*ÁûsN¥ì¶Q¶Ý=ÝŽsíú§ýÿòe­h·W³xÏÂËqaötÕ¡çÉó³ÿO‹¿ç¾‡ÿ}ËÿÄÓ ðx«Â—œšs@Úíš(¶g-»~yÜÆý*°øêU*Æ*Q×µÈÇ,'Õ§É:Ûª¾v‰õ-Q^ÙñçÎߦ¸‡â•›[Û}¡ÎŠ€§˜Ï“œšàþߪÐÿ&Wü+Ò~-izޝñnÖßL{Ut$v7%‚íóÜq´œ‘ú×3ÿO‹¿ç¾‡ÿ}ËÿÄבŒÆS¥W–RŠõ¹õJÃ}Y{YM;¿…+~)œçÛõOúÿäÊÿ…oÕ?èÿ“+þÑÿÂâïùï¡ÿßrÿñ4Ââïùï¡ÿßrÿñ5Ëý£Gù£÷³Ó¶ùê}ÑÿäOJýŸ™ßÁšÃ:ls­LY3§Ë‹Œ÷¯X¯+ø ÖÞ×mî â×nCvî 8Ï8Íz¥{ðwŠgÄU·´•»°¯›¼ñnÿà Óôx|+öèíüÌ\h,[÷HÍ÷Jcv:ö¯¤käo ø[Äz‡-.ì%Ò–ÚMûí ~Î:ƒ\ØÌJÃÁM´µ¶§nYC Z³Ž'›–ßg{ÝwOCÓá}jŸô#ÿåYøÝð¾µOúÿò¬¿ün¸ŸøB|]ÿ=ô?ûî_þ&øB|]ÿ=ô?ûî_þ&¼ïíx<ÜþÍÊ;Ôü?Èè|CñnÿÄúRèóxWì1Ü]ÛfãûAeÙ¶xÛî„ÎÜuï_@×Ê×~ñœ-îïåÒšÚ;»}â¿2 ÈÇR+êšôpx•ˆƒšiëm3¡†£YG ÍËoµ½îû% WŒün°ÔuOx.ÏI³ûeôŸnòàóV=ØHÉù˜€0<úW³W”|V»Õ,'𯈼%}­\i[Iâ 8TY<…÷—ÈÎåSúUS…e$åoŸù˜ã1Yt¨J4ªÍÊÚ&£oÂ(úJŠ(®ãåϾ7MqÅ+6·¶ûCO0&Ÿ'95Áý¿Tÿ ?þL¯øW¤üZÒõ_âÝ­¾˜ö«:èHìnKÛç¸ãh'9#õ®gþŸÏ}þû—ÿ‰¯#Œ§J¯,¥ësê2•†ú²ö²šw VüS9Ï·êŸôÿÉ•ÿ >ߪÐÿ&Wü+£ÿ„'ÅßóßCÿ¾åÿâhÿ„'ÅßóßCÿ¾åÿâk—ûFóGïg§lóÔû£ÿÈžÅðKþI…ÿoúQ%zyÿÁ/ù$:ý¼éD•èô'ÂQ@Q@|û£i¾/¼×<_&€Ú´"¼Wûy—~ýÃ8Ø1Œ÷Îkè*ñ? øoĺƧãÅÿØÖËâ[ÔkìØî7>T—ÜÄÁÞõçæxzØŒ;§G—šëâØÖGN\ÉØöįùéá?Îçü(þÁø•ÿ=<'ùÜÿ…t_ð‚øóþŠ_þP ÿâ¨ÿ„ÇŸôRÿò…ÿ_7ý‰™v¥÷?ò:þ½?æg›ü@Ò|qmàFmaü7ð4:9´íöÿ(Þnò¿Õ¡;¶óÓ8Ç|W­W|b7ëã2ûì7Ÿéþ]Ç”²ìýÜyù[ƒ‘‘ø×V2ÿV©k|/}¶ëä`ëFŠö²vQ×îÔ«ýƒñ+þzxOó¹ÿ ?°~%ÏO þw?áY¾w?èxÿÊLyÞ<ÿ¡ãÿ)0WÀòUþz_ø ÿùõ³ÿ?ŸÜÿÈÒþÁø•ÿ=<'ùÜÿ…_øK§mãìC~¿`óMžï+ý[‘·w=1œ÷ÍsÞw?èxÿÊL¹ðtß·Œ|nu;ï·^ y—RÅ¿÷rcå^á^ÞC¬L¹¥î¿…I=×t´.æý•*ŽMkª~W™ëôQE}a©ó‡Ãý'Ç>Ó¦ÑßÃÂÁ¼Ï(^¼ßõŒí£sŒvÅtߨ?¿ç§„ÿ;Ÿð®3Àòxµ|`4Ï}†Ï÷ž]¿ö|RìýãgænNNOã]ãÏú?ò“|2>³RÒ§ñ=Ô¯¿_wsÄø:/ÙJ«N:lúiØÒþÁø•ÿ=<'ùÜÿ…sž+Óüya?‡&½–Oí»af–Æošã,P>ì|œàç¥hùÞ<ÿ¡ãÿ)0Vmôž%oxHk>#þÒ¶ÿ„‚Ïl?aޝ¼á·/'ŒŒ{Ö¹t*,T/*{ôR¿ÊñH#ĘLKö0ªÛ––³ÿ#f×Nøêž&Ó®n®m.¢ƒúÙãŽ×çR§ÍH¶³ã‚888ÇzØñ]§Æ›ÝÊ´—ÃñÈ%F?ØòÉìÎ7LB…Î3‚ Æ9ƒë”WÜ›Ÿ7G§|K—âo“ªÝé3ëŸØûÕî†"Þv0<•6üõ3í]Göįùéá?Îçü*/ˆ­«/Æ[s£j¿Ù·?ð®é¾Î“nO´>Wkp9ÁϵRó¼yÿCÇþR`¯ŽÎ£7Š|²‚Ñ|JMþ £çØ\ö5j8¾Öäi`üJÿžžüîÂì‰_óÓÂÏøVoãÏú?ò“w?èxÿÊLäòUþz_ø ÿùÖÌüþsÿ#®ø'ÌZ‰£½òMÚxŠéfòs³xX÷mÏ8Îqžq^›^að<Î|?â3u?Ú.Oˆ.|Ù¶ódym£““^Ÿ_¡Ñþ}ºššç]B¾pø¤øâçÁtÚ;øxX7™å Ã7›þ±Ý´c®qŽØ¯£ëæ?ÉâÕðu€Ó1õñ‚™}öÏôÿ.ãÊYv~î<ü­ÁÈÈükÑÆ_êÕ-o…ï¶Ý|Šu£E{Y;(ë÷jUþÁø•ÿ=<'ùÜÿ…Ø?¿ç§„ÿ;Ÿð¬ß;ÇŸô<å& <ïÐñÿ”˜+ày*ÿ=/üü‰‡úÙÿŸÏîäi`üJÿžžüî·> Çs‡âhï|“vž"ºY¼œìÞ=ÛsÎ3œgœW#çxóþ‡ü¤Á]gÀó9ðÿˆÍÔÿh¹> ¹ófØÌ}‘å¶ŽNN{ùfªO™ÁéöS_}Ò.žs‡Ì%¹[ºª=>¹ÿÿÉ<ñ/ý‚®¿ôSWA\ÿŽÿäžx—þÁW_ú)«ê O>ð­÷Äôð†ˆºzøCìBÂoösæy~Zíߎ7cÇ­oí‹_ÝðWåuþ5çÚ ¾4Ó¯Œ~Ïl-"ò¡þÌ…ü´Ø0»'&´|ïÐñÿ”˜+ã*æ9Š›Jµ;_´¿ùΖ{•ÅòÊnëÉÿ‘Øh|Zþï‚¿+¯ñ®sÅW^8ŸTðzø˜xxYYþÍóüÏ3sc;øÛÞùÅRó¼yÿCÇþR`¬Û釫ÿeÝ(i¾Ì“îO´>Wkð9ÁÏ·½VþÒø‡ÿCçþR-ÿ¼|l²ÕWý¥.oGúµsê8){Ôå}¬ÿDkÂ#ñCþ{x?þú¹ÿâhÿ„Gâ‡üöðýõsÿÄÖOö—Ä?ú?ò‘oþi|Cÿ¡óÿ)ÿá\œÙ/e÷KüŒÿÖ¼7üÿt¿Èí> É!пíãÿJ$¯@¯?ø%ÿ$‡Bÿ·ý(’½¾Œî (¢€ (¢€ øÿ_Öõ­7ÆÞ)‡M×5K[Y»fŠÒíâRÞaˆSŒàŸa_`WÈšÞ‹£ãOÍ$÷²ëWkˆÜþ°ŸOzõ²hBx¤ªCYèiK S/gKs7þ¿ÐÛâüËþ4ÂWâ¯ú|AÿƒƧÿ„ZÓþ~ïïàÿ ?á´ÿŸ»ßûø?¾¿ê¸Oú_z:¿°ñÝ×Þeê¾ ñö›5½÷ˆµ‹»gÛ¾ ‹Ù$FÁeIÁÁýE}µ_ë:½–•5Â\];&Ü+¸ å€ô÷¯µ+å3Útሊ§O‘r­7êõ9«ajáeÉW}¼³âv‰ˆ¾!x JšîîÒ9ÿ´35œ‚9WlHß+qœ`ñКõ:ñ_ޝqˆ<Ö·—6s¶ížÖS‰òEœ0ädd} ¯"'VjšÞZ}äBŸµ’¦ºé÷›¿ð¤ô¿úü[ÿƒÿãt“Òÿèkñoþ Wÿבý¯Äô8ø›ÿrQö¿ÐãâoüÉ^Ÿú³ˆþHþoú½Sù#ø¹ÿ OKÿ¡¯Å¿ø1_þ7Pü1Ñ"ðïÄ/éPÝÝÝÇö~&¼I+n‰Ûæ`qœ:^Sö¿ÐãâoüÉ]ÿÀ§¸“Ä2k«Ë›ÉØ·Ou)’Gù%ÆXòp0>€W>''«‚‚©8¤žš^F5²¹a#í%¯¦‡µQEÂsi÷ÚŒ61Ç©C8Ž+†Už€UŸí=_þƒz§þ¿ø×ªü9ø3áÿøMÖïµb›Ÿ7z[Ï Û+ À(OEú×Sÿ ïá_ú øƒÿbÿãuô41¹4iF5pŠRI]é«êþg$°±”›²ûþÓÕÿè7ªà[ÿixfòúãÇ>KFòå±hBO;:ƒæŽp{ÿ{gü3¿…è/âü ‹ÿÖˆ>h¾ÖTY$÷5âŸð‹ZÏÝïýüá^Ýû?D ð^± –+µ2‚Ç“ˆâׇža°ÔiÁÑ ©»î­©œòº˜/zi+ö=b¹ÿÿÉ<ñ/ý‚®¿ôSWA\ÿŽÿäžx—þÁW_ú)«æÌÏ-µ Q-bHõF4TQ.X*ŒpÏ¥þÓÕÿè7ªà[ÿ]²ðÝ´öó5ÕØi"V!dñÅOÿµ§üýÞÿßÁþ÷”pN-àâÝ–·Fë‡kÏÞJ:úÚz¿ýõOü ñ­/ Þ_\xçÂés¨Þ\ Ö-IçgP|ÑÎñ§Â-iÿ?w¿÷ð…[Ñ4X4ïxZh縑›Z´\Hàõ€ú{W>a‚ÁC 9SÂ(´·ºÐRÈ«P^ÖIYz]ÑYpø—A¹Õ—·¦Ë¨d6‰t(eÎá°ä`äcŒ±©jÚnn·¦¡ic8E’êe‰K`œÄ àc_b|óñê{‹‰–Omu=³0^ 1t¼dvÿ óí=_þƒz§þ¿ø×¨|n°Qø¥g ’K®Š˜Ûý|ƒÓÞ¸?øE­?çî÷þþð¯¬Éð˜J¸e*¸eQÝêìk š®-{X%o3/ûOWÿ Þ©ÿoþ4iêÿôÕ?ð-ÿƵ?á´ÿŸ»ßûø?ÂøE­?çî÷þþð¯Sû?/ÿ (ýè¿õoü±ü¡¾ É!пíãÿJ$¯@¯?ø%ÿ$‡Bÿ·ý(’½¿?9Š( Š+ÛÄ\x¶ÿ@ó¬––Ü™ŒN¼ÆpT1]¼m\Ç9`9FÀÅ|»&‘ j)ñ\º¯ˆæÓgíâ¬)}!—~wma“É#>Õôv·{©iöÿh²¶Ód‚4w¹’þý­–%9ȉÁÉ$ãïÛDðî‰â cZð&•a©^îš{{‹H¥‘K1 »±c'kJU9s#JU=œ¹¬x—ü#^ÿ¡âëÿÐÿ…ðx;þ‡‹¯üCþï¿ð‚x?þ…MÿÐÿñ5ãú^¤—’èúܾðjø[XÖÆ™f§OU¼(ìÊ’67 ÁS‘Ü©CWO×eÛñgW×_òþ,â¼U¡øjÏÃwséþ*žúé6l·}B9CåÔ” œOá_Z×?ÿ'ƒÿèTÐÿð]ÿ]sÖªêË™œÕªûYsZÁ^1ñÂÚÎóÄž ƒP¾kWûvû„”DS 1àd€?özÏÔô-[ò¿µ´«ÿ'>_ÚíÒ]™Æq¸g§ ¨Œ¹d¥ØˆË–J]œ¿áðwý_ø6‡ü(ÿ„kÁßô<]àÚð¯}ÿ„Áÿô*hø.‡ÿ‰®oÇZá¿ Ýjš7Ãïê3ÀŽò,–°F°Æ¨Ìe9\¸GÈ'<ëúì»~,ìúëþ_ÅžMÿ׃¿èxºÿÁ´?á]·Àûk;?xÒ >ù¯­Sì;.Q)|¤„üÂHü+¯ðŸ„¼1©ø7C¿¼ðƇ%ÕÖŸo4Ïý›Üí–8 ’OJê4Í GÑ<ßì*ÆÃÎÇ™öKt‹~3Œí8Éëêk*؇V<¯ó2­ˆö±åµ (¢¹Îcå¯!á;/;Çú®•'ï3g²°$¼nˆzg¯¾sÞ·wi¿ôU5Ïü(V½£þOÿЩ¡ÿàºþ&øA<ÿB¦‡ÿ‚èøšÝUŠVäGLkÁ$œâû´ßú*šçþ+T® ¡ñ„¼êZÛÿoÙæÚëU*£ß :Ó?íZët[9OŽtÝÄ?<o¥ÄâÚÎÑ%¹°Èó° ±ùAžÄm¯LƒÁ~µ¸ŠâßÃZ43Äáã’;•‘È …È óš%V-YE!J´$¬ ‘¹EV9ât}[øÏ·ˆuÉ4{EðòÈ— v–åœ\8 ¹Á‚Çxöªßð€|0ÿ¢—uÿƒÛoþ&½‡RðÖƒ¬Ü-Æ©¢i·ÓªY.­RV ’p q’N=ÍSÿ„Áÿô*hø.‡ÿ‰­#V¤U£&¾eƤ⬛<«þ†ôRî¿ð{mÿÄÑÿÃú)w_ø=¶ÿâk­ñÇ…ìô%/]Öþh>ñì:-Æ¿wmlúPºk§¸ŽÒ™v‚F1µsŽNA>Õ/ü#^ÿ¡âëÿÐÿ…}©xkAÖnãTÑ4ÛéÕ,—V©+É88É'æ©ÿ àÿú4?üCÿÄ×M,L©Ç•~lꥉöqå·âÏÿ„kÁßô<]àÚð£þ¯Ðñuÿƒh½'Ç>] $Ôô¯|<¶ÑmíÃÜK«éí½_qÎ<µÁlu$àg"ºx#J›Ãörø‹ÁÞƒVt-q¦Ÿ ' 2!p$g8$b´úì»~,Óë¯ùSø%ÿ$‡Bÿ·ý(’½«ØØYé–qÙØZAikvCb4\’Np2I?X®#„(¢Š+›Õü&5KQ–{¸Åý•½¬°Hå`ÐÊò+ƒ(d ùŒ ´g  ‚+¤¢€8û?‡Ö:n™yciu¶;ÉmÚàh„‘Ç f„G¹]w! ¸a$Iüiá[[‰mî_Úî-øÆq¸Œã#§¨­ ðÏÚKhµ/½ØSûfí˸t‹}qM»+™Â<ÒQn×=Oþ¿ÿÐסÿàÆþ*³õßx?[ðö§¤ÿÂc¡ÃöëIm¼ß·BÛ7¡]ØÞ3ŒçóÛü7ýËoü?üMoðß÷-¿ðÿñ5—µÊÎÿ¨Ãþ~Çï>žÐ¼QàýÃÚf“ÿ އ7Øm"¶ó~Ý ïØwcyÆqœd×A¦kº>·æÿdê¶7þN<ϲ\$»3œgi8Î_C_ ý¿ÃܶÿÀcÿÄ×­þÏR[K©x¹íˆØöí]£¤¹ã뚨ÍÉÚÖ1¯†(ó)©zçEV‡!ÏÿÂwàÿúô?üÃÿÅQÿ ߃ÿèkÐÿðcÿ_"éWš$Zl)v°Æwn€±êqÎ=1W>ßá¿î[à1ÿâk'Q§nVwà EIÕŠ¿™îž–ÃEñUÖ±qñ_÷P^Îó^Z¤vÑ™²"ùÆV*dmœŽ•ßAãO Ý\Eooâ]iåp‘ÇüLÎÄàd’xÅ|›öÿ ÿrÛÿÿW´ ­&ox]l!(Öm Ù÷Ìð=¨Uv³L!%Q;yŸ`QE©Âeê^%Ðtk…·Õ5½6Æv@ëÕÒDÅrF@b2ϱªðø?þ†½ÿ0ÿñUâ_沇⅓_„1Þ›Æï:NØ>õçßoðß÷-¿ðÿñ5œ¦ÓµŽÊ8XÔ‡3¨—©ôwŠ|A¤ë1Y↕¡Ü[JdgK‹yÒPT®†qœŽqß…+‚õ xOK¼‚èÚåõì·×w-wAå“*°£ 8Éç=ù×íþþå·þþ&·øoû–ßø øšŸjÿ•šýFóö?yö›«iºÍ»\iz…¥ô åKY–U €pJ’3‚=Å\¯&ýŸ7ðN®ÐãÊ:ÌÅ006ùqcŽÕë5²<ö¬ìÏÿÂwàÿúô?üÃÿÅWA_éWš$Zl)v°Æwn€±êqÎ=1S)r«Úæ´)*²årQõ>²¿ñoõ=:æÂóÄú–·Q<3'ö”Cr0!†CddÒ¸}ÿ ¼9ã˜ßá¿î[à1ÿâk?jÿ•Q‡üýÞ}aü%,‹~(ÑÜ…U]B"I=«z¾5·¼Ñ%¿²KE€NnáÛ¶§ýbçœzf¾Ê­#.e{Xä¯IR—*’— V~§®èú'•ý­ªØØyÙòþ×p‘oÆ3Äg=EhW†~в[E©xEî˜Û7n]äXãëŠmÙ\Îæ’‹v¹êðø?þ†½ÿ0ÿñUËøŸZ±Õu[Í âÖ•¢yQ™Fµàÿ&Ú{™?Óq]Ûå‹ }k۫Ⱦ0]µŒ¼r–WwŒŸoÄqy’¾cŒ|«ßÉö»q2q£6•ô‘Ã…¨éׄӵšs<‹v¥ÿBî¹ÿ€MFíKþ…Ýsÿš½þ˯ú|Yÿ‚Ãþ4ÂYuÿBo‹?ðXƾGë•¿çÒÿÀ±þÞ¯ÿ?WþyæíKþ…Ýsÿš½3à”ë^0ó­§¶“ý 1N…~YzƒÓÖ«ÂYuÿBo‹?ðXƶ¾ݵ÷Œ¼qröWvlÿ`Ì‘yr¦#|ËÛ8Èö"½L§R¥f¥;ߪ<¬ß3©Š ¡9©Yßktg®ÑEôÎh-|4['EÕncù±,¬èß1èG_JÑÝ©л®àWCà_ÏcàÛ dðψo<ÌOg`d‰ó#•³Î3ƒî t_ð–]ЛâÏüñ¯’ÄâêÆ´Ò¦ž¯íyŸc…ÎëS¡*©Y%·dyæíKþ…Ýsÿš¦ÒÚìøÇ¿hÒµDþÛ´Ä—6æ5'Ì{ÿ®÷þ˯ú|Yÿ‚Ãþ5™¨ë“jzÿ„a“@×4ð¾!³6þÌćæ#h9ëÎqè ^ VXˆEÓK^æxìæµl<éÊ¢i®ÇÑTQE}QòGÏ_ ãâ­§Ùì®®ßû3´FFÏ“’oñÅnÔ¿è]×?ð «Ó¾!j2iŸ š=3QÔ xySʰƒÍqþçqéÆ3êETÿ„²ëþ„ßà°ÿ|Þgˆ©Oã'¢ëc鲬֮©Æ¢Jï¥Ï<Ý©л®àQ»Rÿ¡w\ÿÀ&¯Cÿ„²ëþ„ßà°ÿð–]ЛâÏüñ¯?ë•¿çÒÿÀKûz¿üý_ø Ð|Ýÿvµ¾7‰ÿ¶çÝ‹µ”ùqpGb+ÕëÌ>Ln4LÐÍ“Ä7Nb™vºec;Xv#¡µéõö4àŸ‘ñUeÍ9Iõa_h-|4['EÕncù±,¬èß1èG_Jû.¾fð/ˆg±ðm…²xgÄ7Šžf'³°2Dù‘ÊÙçÁ÷¼üÚ¬©ÑN1æ×½º3ÒÊ1sÂ×s„¹n­µú£žÝ©л®àQ»Rÿ¡w\ÿÀ&¯Cÿ„²ëþ„ßà°ÿð–]ЛâÏüñ¯žúåoùô¿ð#èÿ·«ÿÏÕÿ€žwºøÜZyÚ.«mÚà̳ڲ"þõz“ÓÒ¾»¯µßÏ}§%³øgÄ6j÷vÙžòÀÇbt?3gŒãÜŠú&¾‡)«*”[”yuï~ˆùÌßLn4LÐÍ“Ä7Nb™vºec;Xv#¡µëåçRrR‡.îxÙÆcSÆsR³ícÓëŸñßü“ÏÿØ*ëÿE5tÏøïþIç‰ìuÿ¢š½ÓÀ>\ÓÛPþ͵٠ë§’›dŽÍ™X`rpjÎíKþ…Ýsÿš»/øžæßÃz\+á?Î#´‰°éÅ‘ð€nSžAê¥hÿÂYuÿBo‹?ðXƾ:¦.ª›^ÉoüÇÚRÏ+ÆŠª´K¡ç›µ/úuÏüj›Kk³ã ý£JÔmûnÓ\Û˜ÔŸ0p ïþ»ßøK.¿èMñgþ øÖf£®M©ëþ†M\ÓÂø†ÍüÛû3˜ ç¯9Ç 5¾ VXˆEÓK^æìæµl<éÊ¢i®ÇÑTQE}QòGÏ_ ãâ­§Ùì®®ßû3´FFÏ“’oñÅnÔ¿è]×?ð «Ó¾!j2iŸ š=3QÔ xySʰƒÍqþçqéÆ3êETÿ„²ëþ„ßà°ÿ|Þgˆ©Oã'¢ëc鲬֮©Æ¢Jï¥Ï<Ý©л®àQ»Rÿ¡w\ÿÀ&¯Cÿ„²ëþ„ßà°ÿð–]ЛâÏüñ¯?ë•¿çÒÿÀKûz¿üý_ø Ûüÿ’C¡ÛÇþ”I^^ðKþI…ÿoúQ%z}¡ðáEPEP_?hÞ-ÖtoÅöºw†µ!^HfûzAµ‹·k)ÏûûWÐ5òïŒeð÷‹|WhšgÚƒë—rïû@LfB1§Óõ®,ÃWGØQ¥íüºôôiþ'.2¶&.|*N^{~hôOøY(ÿ¡ÿ+ñ4ÂÈñGýùX‹ÿ‰¯:ÿ…qÿ@üœüEð³®?èÿ“ƒÿˆ¯ ýNÌ?è^ÿòþLò?´óßù÷½òFÿÄkÚ¿õß }‚Þ_+}Ïö’K³)(PNHñ¯£«ãïøîmgA¹ÓÛHû:Ë·2ý¤>Ü0=6ŒôÅ}ƒ^Ö[–ÖË©:5¨û&Ýí¯d¯«}­ò=L |]jnX¸¥+ôí§›ó ò_‹Z•Ö‘ãßYX}¾â/·ì¶ó„[óó@À$þëUâŸõFѵïê oö†‹í¸‹~ÍÙX‡\uÍu×£*ô¥FærM%ÞêÖÓ¾ÇMiT…9J’¼’võè?þGŠ?èCÿÊÄ_üMð²`9Ƽ³þuÇý?òpñè_5FÖuïj oöv—ìY‹~ý¸YG\ ôÍznCŠËªºÕ°®’j×÷»§mdû_äv`q™•jŽ8¸EFÝ-¾ŸÞ~gµÑEí©óÃÿkÚGôë/ }¾Þ/7eÏö’E¿21?)RF #ð®›þGŠ?èCÿÊÄ_üMy7‡™öPšå¤»þÐ8 chõý*ð¼/ÂÖz˜'\½í>ù5ø‡Çç*ÆéÅEïf¯ÿ¥3êÊ(¢¾ˆöÏñÖ· üj†ëNÒ?µ&¬f´¬TÜ1Ý¹Ï {ûRÂÈñGýùX‹ÿ‰¬oŒëø{âÅ¥ÚYý¨¾†‘lóvc3ÈsœOÖ¹?øY×ôÿÉÁÿÄW…˜pö/[ÛÑÂ:‹no{§¤’ü#Í(ÕäÂÂ.>v¿þ”Eÿ…‘âúÿò±ÿGü,Їÿ•ˆ¿øšó¯øY×ôÿÉÁÿÄQÿ :ãþ€ù8?øŠâÿS³ú¿üŸÿ“9´óßù÷½òG°|¹–÷Dñ5Ôöÿgšo]Hðïå±XÉ]î Æ{צו|º7Þ×.Ì~YŸ\žR›³·tqg¿ZõZúzpp‚ƒVkKv=ø98§-ú…|ãðÿÆÚö‘à}:ÆËÂ_o·‹ÍÙsý¤‘oÌŒOÊT‘‚Hü+èêøûÞ;›FÐm´õÒ>бnÄ¿i »,OM§q\Y–[[1¤¨Ñ£íZw¶½š¾w·ÌäÇWÅѦ¥„Šr¿^Úù¯#Öádx£þ„?ü¬EÿÄÑÿ #Åô!ÿåb/þ&¼ëþuÇý?òpñÂθÿ þNþ"¼_õ;0ÿ¡{ÿÉÿù3ËþÓÏçÜ>õÿɈüm¯júJØÞøKìòÝÚï¹þÒIvbxÈùB‚r@{õ|7ŽæÖ^ÚÁ´³¬·pf_´‡Û‰TôÚ3Óõ{Yn[[.¤èÖ£ì›w¶½’¾­ö·Èõ05ñu©¹b┯ӶžoÌ+É~-jWZGŽ< }eaöûˆ¾ß²ÛÎoÌhÌA“øW­WŠ|wÕF×¼¨-¿Ú/¶â-û7ebpq×5×^Œ«Ò•G™É4—{«[Nû5¥Rå*JòIÛ× ÿøY(ÿ¡ÿ+ñ4ÂÈñGýùX‹ÿ‰¯:ÿ…qÿ@üœüEð³®?èÿ“ƒÿˆ¯šÿS³ú¿üŸÿ“"Oc¤ÙÚ Ì@‘ûX¶¨ÆÞ:U¯øY×ôÿÉÁÿÄWËÔá|ææ° §­ýýòcÀžc©5q·MWÿ$z/ü,Їÿ•ˆ¿øšÆÖ|[¬ëÚß„-u ÿež"³MöôŸs#nÕQŽ 9ö÷®OþuÇý?òpñ¶^1—Ä>-ð­£éŸe ®ZK¿íó‰Æ6_Ò¶Âð¾7 Z5ê`œur÷´ûä×ài‡Çç*ÆéÅEïf¯ÿ¥3êÊ(¢¾ˆöÏñÖ· üj†ëNÒ?µ&¬f´¬TÜ1Ý¹Ï {ûRÂÈñGýùX‹ÿ‰¬oŒëø{âÅ¥ÚYý¨¾†‘lóvc3ÈsœOÖ¹?øY×ôÿÉÁÿÄW…˜pö/[ÛÑÂ:‹no{§¤’ü#Í(ÕäÂÂ.>v¿þ”Eÿ…‘âúÿò±ÿGü,Їÿ•ˆ¿øšó¯øY×ôÿÉÁÿÄQÿ :ãþ€ù8?øŠâÿS³ú¿üŸÿ“9´óßù÷½òG¶üÿ’C¡ÛÇþ”I^^ðKþI…ÿoúQ%z}Qô!EPEP_øK½½ñ¿Š$¶kpƒY»SæÎ|Ãè=ëìJù‘|)}®ø›Å—VºßØQ5ëÈÌeYrCç9$zãÕèåUý†!O]ŸÃkþ'F:•9j¦×—ôŽþýWþzYßMþÂ?ªÿÏK/ûé¿Â½ þÖ¯ÿC_þS“ÿŠ£þÖ¯ÿC_þS“ÿН§þÖ]ê}ñÿ3ÓúŽþ}Kï_æy–¥£êº|³Nö¦5ÆBÏP;Šû~¾Rñ‚õ'Â÷—ÓøƒíqE³0ý‰cÝ—Q÷ƒc9ü+êÚùÌßíë©^[}«_wÛ¡çbéR¥5Qi[¯ü¼+öˆ·–ëPð”07Û0_8àD{Wº×ükÓ¦Õ¼Qà«.þÉ,¿nÄÞX“n3÷IÎ1ø×|•á-tkm÷éæaJ1”Ôd®›) {QgËg©…[ÿ„Uÿž–_÷Ó…tþð^£«x^Îú}’)wâ±,›pì>ñaœã?mÿºÕÿèkÿÊrñUö8\ÍB„#zš%³VÛ¦»Å,PR•96×uþgžÿÂ?ªÿÏK/ûé¿Â´<9¥ÞÙxßÂò\µ¹C¬Ú(òËg>`õÕÙºÕÿèkÿÊrñUY¼)}¡x›ÂwWZßÛ‘õë8ÄeX°Kç9úcõž;2ö˜yÂóÕujß=I¯ƒÂB›”)É5ݯó>›¢Š+ä$ùÃãµÅïÄë(íŒAÆŒŒ|Âq:_O­y×ü#ú¯üô²ÿ¾›ü+×¾)h·:ïÅûk[]Cì.š ÈdòD¹áÆ0HõÎ}«þÖ¯ÿC_þS“ÿН¦Ê±þáyîþ­ø³ÓÂápÕ)óUƒoÊßæ=ÿ„Uÿž–_÷Ó…ðê¿óÒËþúoð¯Bÿ…u«ÿÐ×ÿ”äÿâ¨ÿ…u«ÿÐ×ÿ”äÿâ«ÒþÖ]ê}ñÿ3£ê8ùõ/½™Ü~Ïq¼>Õ£©tÖfVÛÓ"8³ŠõªòßVÏgáŸZÉ/œðë×´›vï!"ã¶qœW©WÅU|Ó“ó) {QgËg©…}¿_)xCÁzŽ­á{;è£ÿŸRû×ùž{ÿþ«ÿ=,¿ï¦ÿ ÷¯Ùî7‡Á´r.šÌÊÛzdGq\?ü+­_þ†¿ü§'ÿ^ð*Ùìü3â Y%óžzâ6“nÝä$@œvÎ3Šñ³ŒgÖ!yhþÓ_£g3‡¥éAÇÖߣg©W?ã¿ù'ž%ÿ°U×þŠjè+Ÿñßü“ÏÿØ*ëÿE5xò5®‡©Íi ‘½ GYwÎã½gì« |ç ŸLcÞ°Çf^Ó8^z®­[ç©ðxHSr…9&»µþgÓtQE|‘äŸ8|v³¸½øe±ˆ8Ñ‘˜N1çKéõ¯:ÿ„Uÿž–_÷Ó…z÷Å-ç]ø¿mkk¨}…ÓAY žH— \8Æ ¹ÏµaÿºÕÿèkÿÊrñUôÙV?ØaÔ/=ßÂÕ¿zx\.¥>j°mù[üÑç¿ðê¿óÒËþúoð£þýWþzYßMþè_ð®µúÿòœŸüUð®µúÿòœŸüUz_Ú˽O¾?æt}Gÿ>¥÷¯ó=Kà—ü’ þÞ?ô¢Jô óÿ‚_òHt/ûxÿÒ‰+Ð+áÏ(¢Š(¢Š+ÊåøO®Å«j·zgþá}5ëAý”’ìi8ÜϓۧJõJ)©8»¦Te(»ÅØò¿øV.ÿ¢‹ÿ”H¿øº?áXx»þŠ/þQ"ÿâëÕ(«öÕ?™ýåûz¿ÌþóÈ5?ƒ¾#Ö4ùl/üç[KéýçÈpz^¿E2“–²w"S”äîÅxëÀ·ž.Ô4kû wû&çKóö?Ùû¼Ð ðX€§×¯lWkE$íªm;£ÊÿáXx»þŠ/þQ"ÿâèÿ…aâïú(¿ùD‹ÿ‹¯T¢¯ÛTþg÷š{z¿ÌþóÊÿáXx»þŠ/þQ"ÿâëÀ¾¼ðŽ¡¬ßßë¿Ú×:§‘½þÈ Ûå€Ä†;æ»Z)Jr–’w&U'%i6Š(¨ ò 3àïˆô}>+ ù6ÑgbcFØÉ$ò\ž¤Õ¿øV.ÿ¢‹ÿ”H¿øºõJ+EVkE&h«TJÊOï<¯þ‡‹¿è¢ÿå/þ.ˆ¾ë²êÚUÞ§ã·A§ßCz°e$[Ú6Î7+äddwëÒ½RŠN¬Ú³ljYÉýáETœ‹þê>!ñl^ ÓŸ…‡ü›h³±?±£ld’y.ORk×誌œu‹±Qœ¢ïcÊÿáXx»þŠ/þQ"ÿâèÿ…aâïú(¿ùD‹ÿ‹¯T¢«ÛTþg÷—íêÿ3ûÏ(“á?ˆîü¸ïüyö›e–9ìxÓ~Ç àù¨¯W¢Š™IËY;‘)ÊNòw â¼uà[Ïj5ý†»ý“s¥ùûì‚}ÞhPx,ÀSë×¶+µ¢’vÕ 6Ñåð¬<]ÿEÿ(‘ñt°ñwý_ü¢EÿÅתQWíª3ûÍ=½_æyåð¬<]ÿEÿ(‘ñuÕx Áóx/I¾´¸Õ?´§¼¾’öIþÎ!ùT´1W?Jꨩ”å/‰Ü™Tœ¾&ØV~»¦mø{SÒ|ï'íÖ’Ûy»wlÞ…wc#8Îq‘ZTy5¯ÂoYZCkoñ dF±Æ¿Ø±ª£d¾z —þ‡‹¿è¢ÿå/þ.½RŠÓÚÔþg÷šûj«í?¼ò¿øV.ÿ¢‹ÿ”H¿øº"øO®Ë«iWzŸþÝŸ} êÁý”‘ohÛ8ܯ‘‘‘߯JõJ):³jͱ:Õ³“ûŠ(¨38ü;Ô|CâØ¼A¦x—û&t±L¿`Y÷(‘Ÿ9frGnÝy¬¯øV.ÿ¢‹ÿ”H¿øºõJ*ÕIÅY2ãVqVŒš<¯þ‡‹¿è¢ÿå/þ.øV.ÿ¢‹ÿ”H¿øºõJ)ûjŸÌþò½½_æyÏø'Ãð‡xBÇ@ûgÛ>Ëæ~ÿÊò÷n‘Ÿîäã±×µtQY™Q@ÿÙhpcc-1.4.1/hpl/www/rollM.jpg0000644000000000000000000015624211256503657012543 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀã¹"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷úËÐ|C¦ø–ÂKÝ.æ9àŽâ[vduo™®~RF‡ª²žõ©\;øCZ¸±6ÃR‚ÇËÔ/n¡šìù±Ï1•wfƒ 7AÞ8È#$P¦¿ãKÞµ´öZ•ȆÜ]ÞMioæ%œö $9p¡›¹Æk¤¯;Õ¼'¬Úèš”)©é­þ˜,oﯥE¬*ó3Ê¢G‘œìÆU Pàí^“þ¿ÿÐסÿàÆþ*€: +Ÿÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*€: +ÇÅžÔï#³°ñ•wu&vCìr;`p äà Ø Š( Š( Š( Š*½õýž™g%åýÜ–±ã|ÓÈ#EÉe$øÐŠ+Ÿÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*€: +Ÿÿ„ïÁÿô5èø1‡ÿŠ«šo‰tfá­ô½oM¾P»Gkt’°\’“Œ3î(RŠ( ŠÃŸÆžµ¸–Þãĺ43Äå$ŽKø•‘ÁÈ ñŠþ¿ÿÐסÿàÆþ*€: +Ÿÿ„ïÁÿô5èø1‡ÿŠ£þ¿ÿÐסÿàÆþ*€: *8'†êÞ+‹ycš P [xWHƒT¶ÑŽ¡”)tdÑYØÊËyGqÝžrsQ8su:pØ`ÛåNýÏ/ÿ‰ýCñÊ?âEÿPßür½“þ/Ÿóë¡ÿà‰¿øÍð™| ÿŸ]ÿMÿÆk?aæuÿiÿÓ´yo„u¿øsâ>«\\[[ØÁö>X~Ýв®BO'+Þ4?‹Þ ×®¯¡‹VŠÍ- 5ü‰n—³Ì{Ø1ÆÞr2=kÎo×ᯎ¾#øCIÐ,¬e±¶ý¾+K7³ÝˆCG’ ÁV<9õçÑ´?„> Ðn¯¦‹IŠñ.Ê üip–ûsÄ{Ô°ÎîrNp=+XÇ•Xà¯WÚÔsµ®mÂwàÿúô?üÃÿÅQÿ ߃ÿèkÐÿðcÿGü žÿ¡SCÿÁt?üMð‚x?þ…MÿÐÿñ5FAÿ ߃ÿèkÐÿðcÿGü'~ÿ¡¯CÿÁŒ?üUð‚x?þ…MÿÐÿñ4 àÿú4?üCÿÄÐÿ ߃ÿèkÐÿðcÿGü'~ÿ¡¯CÿÁŒ?üUð‚x?þ…MÿÐÿñ4 àÿú4?üCÿÄÐ{ÿˆ¾±Ó®o?á#Ò®<ˆž_&Þú’M ¨»¹cŒÜ×—üDøµá|-Ô¬,/'Rºò¶YÏnáÆÙМ°>ê“÷¿^+Ô/þx>ûN¹³ÿ„sJ·óâx¼ë{RH÷7#má†rc^_ñá/„|#ð·R¿°³žMJ×ÊÙy=×;§@r „û¬GÝýy ãþ-ýHßù)GüZú‘¿òR¾oû‡¹oÿ€Çÿ‰£íþþå¿þþ&¸þµ/ùöÏ£þÁ¡ÿAtþõþgÒñh?êFÿÉJÇðßü#ð½dÿ„_û+ì?ðÿÙž_—æ}¤g>_±·ß¯û‡¹oÿ€Çÿ‰¯Bø5”ßïZÄ ˆhÎÄÚ7yÑöÀö­)×”ågŽLnWK KÚF¼&û'¯æ}EWAãž/àÿø@ÿ´|cÿ Gü#Ÿnÿ„–÷göŸ‘æyy\cÌçnw{g5ÔÅ ÿ©ÿ%+ƒÐuÿ†úV³âø¾µ¹ÿ —ÀÏùõÐÿðDßüf€:ø´õ#ä¥cø³þgü!ºçöwü!¿nþϸû?ÙþËæyž[mÙŽwgÇ9ªÿð™| ÿŸ]ÿMÿÆk/ľ,ø5sá]^ .ÛF„–S%©Eda)B å §v9ÈÅz§?äžxkþÁV¿ú)k ®ÀŸòO<5ÿ`«_ýµÐPŸümÿ’C®ÿÛ¿þ”GGüZú‘¿òR¿òHußûwÿÒˆëŸÿ„Ëàgüúèø"oþ3@üZú‘¿òRø´õ#ä¥sÿð™| ÿŸ]ÿMÿÆhÿ„Ëàgüúèø"oþ3@ÿð‡ÿÂÞð'ü"ŸØóûOöW“ÿ>ÿ.ÿ/þŒû×°W‡Ùk>Õþ/x'þ¸¬cò¾Ýö¿²XlæßäÎQwt\së^á@xüð‡ÿÂÞñßü%Øóû7ö¯“ÿ>ÿ6Ï3þœ{W°W‡Þë>Ò>/xÛþH¬dó~ÃöOµØœbßçÆ¶õOLñé@‡üZú‘¿òRø´õ#ä¥sÿð™| ÿŸ]ÿMÿÆhÿ„Ëàgüúèø"oþ3@ºþ›ÿŒÐAÿƒþ¤oü”®?â—ü+øW·öü"ŸÚ¹ò~Áö;ýrnÛ³æû»³ŽÙ­øL¾Ï®‡ÿ‚&ÿã5Ì|Dñ/ÂCÀ𕝆­ô¤ÕßÊû9ƒIh\bT-‡1Œ|¡»óÒ€>‡¢Š(¢Š(¢Š(Ÿñßü“ÏÿØ*ëÿE5pþø“öèvð…xÊãÈÓíâó­ô­ñɶ5‘·r§¸®ãÇòOwÓ5‹­?^ñtPøw^Ô”ø†ñŒÚ}‘š5;€ÚH?{ŒãЊÕÿ„šÿþ„à©¿Æ­ø?ÆŸðŽj>1³ÿ„gÄz¦ÿÞËçi–|k’£imà ÆqèG­uð´ÿêBñÏþ ÿû:òkä¸ZõI§wævÓÇÖ§ìŽ3þkÿúǧ\Ý ã%òby3q¦ùQŒ~wÜv¯¶85©áo h:Ï|1qªhšmôë£Ú"Éuj’°_)N`N2Iǹ®Âx!º·–Þâ(æ‚T)$r(eu#x Ž1^ÑÀ|Ñã/ŽPøÏÁš¦ƒ.%‹Ü¤f)–èJ7,¨øa±p6«r3Î8ç#Ö?ámEÿB?ðR?øº¥ñkBÑôO„>"þÉÒ¬l<ï³yŸd·H·âá1 g=}M7þ¿ô"x×ÿÿН+3Åã0üŸU¥Ï{ßËk}úšB1±þÔ_ô#ø×ÿ#ÿ‹£þÔ_ô#ø×ÿ#ÿ‹ªð°åÿ¡Æ¿ø(?üUð°åÿ¡Æ¿ø(?üUy?ÚÙÇý—ìéÿ1BO§Š~/xn‡­éfþÐÏö¥§‘æn·þNq·ŸLZöñhüDú÷Åïîе½+Éû~?µ-<36ÿÁÉÎ1ϦG­{M}­jÔ#:ð哽×mÈÊI'dãðx«þŸ‹Þ;ÿ‰¹ªý£û?þAV~•¶ßøùÎî=p}+Ø+ÆO­|ñ{Ç?iÒµ{ÿµ}ƒoöu¸—fÛ~we†3»¡®©IE]²%%y;#¦ÿ…§ÿRŽðOÿÙÑÿ Oþ¤/ÿàŸÿ³ª_ð»4¿ú<[ÿ‚åÿã”ÂìÒÿèTñoþ —ÿŽV~Ú—ó/¼Ëë}è©âω?nðn¹gÿWŒ­üý>â/:ãJÙ{£a¹Ûw 3’{ î< ÿ$óÃ_ö µÿÑK^y⟋Úv§á jÁ<3âx^æÂxVI¬Q FÃ,wðrO¥z?äžxkþÁV¿ú)jã8Ëáw.!?…Üè+Íþ1]}‡NðçÙç¸ò úVÇü-?ú¼sÿ‚þαü7¯ÿÂGñÖKÏìWKÙᣓ©Ûy6.AÜ'+Î3ê¥zÅQ@/àÿÂ9¨øÆÏþŸê›üK{/¦Xyñ®J¥· 7Ç¡µÔÂÓÿ© Ç?ø'ÿìë—ð?áÔ|cgÿψõMþ%½—ÎÓ,<ø×%FÒÛ†ŒãÐZê?áiÿÔ…ãŸüÿötÂÓÿ© Ç?ø'ÿìëø¥ãÿí¿‡¶ÿ—Šì<ï'ý&ÿMò¡LLó6ãŒãÜŠì?áiÿÔ…ãŸüÿöuÇüRñÿößÃ[Nÿ„KÅvw“þ“¦ùP¦&Fù›qÆqîE{…Q@Q@Q@ÿŽÿäžx—þÁW_ú)«‡ðŸÄŸ°ø7C³ÿ„+ÆWFŸoo¥oŽM±¨Ü»•8È=Åw;ÿ’yâ_û]覮WÁòøüx'Av~kQ§[ù&k¹Ã”ò×nà# c8&€-ÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÖ‡ñþ||)ÿ—üj;â?üøøSÿ.?øÕqóø«þo‹Þÿ‰¹¥}ŸûCþB¶~G›ºßø99ÆÞ}2=kØ+Éõñ#ü^ðü$iQcûCÈþÏšI3þŽ7nÞ«áÆ3Þ½b€ (¢€ (¢€ (¢€ óÿ¿òHußûwÿÒˆëÐ+Ïþ6ÿÉ!×íßÿJ# þŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þδ<ïˆÿóãáOü ¸ÿãTyßÿçÇŸøqÿƨ?þŸýH^9ÿÁ?ÿgXþ×ÿá#øë%çöF«¥ìðÑ‹ÉÔí¼‰ î “•çõÒº;â?üøøSÿ.?øÕsº#kmñåνŸ×ü#`°•Ý6}¨c%ÕNsžÞ”êTQEx¿ƒüiÿæ£ã?øF|Gªoñ-ì¾v™açÆ¹*6–Ü0Üg„z×Qÿ Oþ¤/ÿàŸÿ³¬IâÕ½ñÐm´Imá'½Þoî%Gß•Î# cýk®ó¾#ÿÏ…?ð2ãÿPü-?ú¼sÿ‚þαüYñ'íÞ ×,ÿá ñ•¿Ÿ§ÜEç\i[#tl7;náFrOa]Gñþ||)ÿ—üj±|a/Ï‚uáyg᥵:uÇœa»œ¸O-·m0 Æq’(ªð'ü“Ï Ø*×ÿE-tÏøþI熿ìkÿ¢–º óÿ¿òHußûwÿÒˆèÿ…§ÿRŽðOÿÙÑñ·þI»ÿnÿúQhyßÿçÇŸøqÿƨ?þŸýH^9ÿÁ?ÿgGü-?ú¼sÿ‚þδ<ïˆÿóãáOü ¸ÿãTyßÿçÇŸøqÿƨŸÅ_ð“|^ð'üHuÍ+ìÿÚò³ò<ÝÖÿÁÉÎ6óé‘ë^Á^O¨?‰â÷€á ƒJ‹ÚGö|ÒIŸôq»võ\1žõëWÏþ!¼º´ø½ã³hš¾§»ì[¿³­LÞ^-Æ7c¦{zà×Ðäú{ø‘>/xûþø4©sýŸçÿhM$xÿG;vìVÏñg8íXס ôÝ9ìÌ1XjxšNMŸùÜå¿¶uOú¼[ÿ‚¦ÿ?¶uOú¼[ÿ‚¦ÿõ;â?üøøSÿ.?øÕwÄùñð§þ\ñªó¿°ðŸÞyêæ³ûÏ×umFOjHþñ<Ö²†šm5•#ÌÇ<ÔŸJ«Ç­{AÐtmÃÃö–Ïee N÷Åä3(6:¨Ù´ùº¶C _Qñ„¾?> ׅ埆–Ôé×q†îrá<¶Ý´À'ÆH®‹ÂVzŸÃÙßÚAwk&•i¾ã#b4#*x8 »p¸:XTãO©èà°pQq£ÔãüSñŠóLðåÝ凃|GiuÍ“jÚaŽÙrêö‘HäWš_|eÔüM‰&µ¢¢Ã¥kÖ·ÒÝX µCb-¬HÞ@r q¸Ç×ÓwÖzœ–wö]ÚÉðÏ‘•<á^kñ×O›VðΦÛ4k=Þ½ofBB†t•FHã'Ð×d\’–ÇiWþ#¿ôñþEÿÇ(ÿ†ˆð¯ý2ÿŸíÿ&ÿãU¹ð·ÂŸ‚¾/Ü麬¶’Ï.‚Ó©µveÚny,ªs•=½+—C.…+᪹K³V\ÛÕïEW’hx¿ƒüiÿæ£ã?øF|Gªoñ-ì¾v™açÆ¹*6–Ü0Üg„z×Qÿ Oþ¤/ÿàŸÿ³¯:Ò¼¬øW]ñuާØ\C'ˆo&/q+«,0 ñ…­ÿ “ÅôÑÿð"_þ&»iåØª‘S„Lê† 8©F ¦uÿð´ÿêBñÏþ ÿû:ãþ)xÿûoáÆ­§Â%â»;ÉÿI¿Ó|¨S#|͸ã8À÷"—þ'Š?è £ÿàD¿üMs~=ø—¯xƒÁZ†—{¥é°ÛÏåï’œm‘X`Ž Sž[‹„\¥M¤‡,&1r”‘ôQ\' QEQEyž‰¯^è°Ù‰5É?¶5$œÈ²––å¸"ÖÖ@ ߆ ÷þ_—@;é³k>Õô»vg½²šÞ6¡ ‚p ÆO¡®IÓ~,èÚ5Ž—nÞ h,­ã·¤7EŠ¢…àœAZ]W]ðF¿p.í.ö{w²L ÇÙ®Hc©Þ6ðÛáÈù†Ó]åy½ý÷Å}3N¹¿¼“ÀÑÚÚÄóLûoÔPKN=+Ã^>ñÇ‹î'·ÐµoÝO’3ôm´œdPHÎ#8ÈÏQ]ÇĽûÅ5}LXÚòt¢Gm¡ÊH¯·=!HÀÉ s\¿‚ä¾×>%®º<-©h:}¯‡N0Þ[ù*’ùÁÂD8ÜA€1ŽBär×Ã~<ÔüwáísIJxqmt´á4Öœ;y±lèã/qß­zEPEPEPEP\¿ÄO Þx»Àš–‡a$Ý]y[v!ÙQÎHôSÚºŠ(ÏÿâïÿÔÿ“uOUÕ~(èz]Ƨ©ÜøÚÎÝ7Ë+ý¯ ?™$à9$€2Mzecø©íâð¾£-Ö‘ý±D\iÿg3}¡ÁÊ&Эն󃷩àPøkÆñZø¨j§_ˆRKÝ„‹}ÕSÔîÝ‚‹’¹u¦­¦üYÖtkí.á¼°^ÛÉo#Fnƒu*HÈ#8>†» i³hÞÒ4»†§²²†ÞFŒ’¥‘’2ÆG ­J(®?â†õéÚ?ö–)}¦ê°ê öæqyaðÀIäŽ8ã<×aEyÿü]ÿú‘¿òn©êº¯ÅK¸Ôõ;ŸÛYÛ¦ùeµáGó$œ$I¯L¯?ø¿á»¯xJÑ-¬ä¾ŽÇPŠòæÒÛ,𪺺Çêøn±ÆNËÐXj:g‚æ³¼´¾´³PŸû2øÖ9ÖÔWÌQÑ‹'>¼|¸¯@ Š( ÔþxÒ_ë7¶šÙ¯µ îãË6ðÉá1œc×ëUÿáPü@ÿŸÏ ÿßÙÿøŠ÷ú+¶žcЧM¤Ž¨cqŠŒfÒGÉæ]3Q¹°¼ñG†cºµ•á™<‹ãµÔÃ"zWa}ðWÇ—örZË{á°Œ”–px ÿÏ?jõ=Q5­Oâ–•§2Hž²²:Œ­öbÑÜÜï(‘³žL¬ª:åsŒ…+ÚSže‹œ\eQ´Ç,v&Qq”Ý™ó…Ïü/?øHt/í/·ÇÚù~Vß³}ôÏÚ>Íÿ,úgwmØï^¯aÿ OûFÛûGþß°ù©ö³ý«Ìòò7lϱœgŒ×qEpœEP^w?„þ$­¼­oñ29'Li&‡+68†HïƒC^‰E|É£ø3âç„ôkÍJ=RMÏH·’ámf¾ÆëµÙÂD»Ðž§æ–sÈm·Ä?ˆw°Ïÿ nß1ñý›nq‘Ÿî×¾øïþIç‰ìuÿ¢š¾V±±wÓí˜_ݨ1)Ú¬¸ψ«ìÒw±ìdù{ÆÎQQæ²ïc±ÿ„÷âý ÿùM·ÿâhÿ„÷âý ÿùM·ÿâk–þΓþ‚7¿÷Úÿñ4gIÿAßûíøšåúâþoÀ÷¿Õ¹ϯü›þ è¾ñ׌¯~#èzN­â¶ØÞyþd_c†<ì…˜r«ž j÷ÊùáÅ»Añ{îf›wÚ¿ÖqþŽý0}A]´gÏ#å³3ÃbeE«ZÚoÑ0¢Š+Sˆ(¢Š(¢Š+ø¥­ê>øq«jÚMÇÙï ò|¹v+íÝ2)á‚G"» óÿ¿òHußûwÿÒˆèÉÿá=ø‡ÿCþSmÿøš?á=ø‡ÿCþSmÿøšå¿³¤ÿ ïýö¿üMÙÒÐF÷þû_þ&¼ß®/æü¶ÿVåÿ>¿òoø'Sÿ ïÄ?úÿò›oÿÄ×_ð«Æ^*Ö¼ys¤ëš×ÛíWLk•_²ÅJŠQAèO~õäÿÙÒÐF÷þû_þ&»Ÿ‚p˜>)Þ+M,Äè®wHA?ëãã€+Zi>[žvi“K ‡u];jºßõ>ˆ¢³ôÍwGÖüßìVÆÿÉÇ™öK„—fsŒí'Áëèi·Þ!Ñ4»Èìõ cO´º”Ž ‹”Ü@!Iä‚? í>hóxïÅÚ‰&³ñ×ö]­†·ua ¿öLá# ¯Ìpz0ç§^kcþ/ˆôSÿòoþ5ƒào M¬^øÊâ?ëzh_Þ§“a4jÊÄ21Ï8ëØW]ÿ-×ý+ÿÀ˜?øÍgÿÂ%ñþŠþP-ÿƳõÝâ‰áíOVÿ…“ç}†Ò[Ÿ+û ÝwìBÛs“ŒãÁ®ƒþ[¯úòõÍWKò³óýŸ"'™›s÷·#gãêhcþ/ˆôSÿòoþ4Â%ñþŠþP-ÿÆ´?áºÿ¡ãÅøÿ£þ[¯úÞiŸh£RÇ’OJã|aàË›_ë× ãÜ´ë‡0Íq I1¬@àô8"º¯É<ð×ý‚­ôRÐA\?ÄÝ[YÓ4íRþκԵ»{¸òm© p~W8 Ý:×q^sñzueáuž[s/‰ìÐM ãÈnRAFA  ¿áø‡ÿE?ÿ(ÿãGü"_ÿè§ÿåßükCþ[¯úøªjöº¶³<ÖþVØîf£mÒ¢œ=×­%—ÕŽupN-F›O¦¿ðO|¢Š+cÏ (¢€ (¢€9ÿÿÉ<ñ/ý‚®¿ôSWšxcà–…«xOFÔfÖüA·v0NéÒRñ« Æp9ãšô¿ÿÉ<ñ/ý‚®¿ôSWá?„Ôü¡ßÞh^eÕÖŸo4Ïö¹ÆçhÔ±À| ’zRi=Ë…IÓÖ ¯Aßð ¼=ÿAÿÿàdünøP^ÿ ÿ‰ð2?þ7Zßð¤¾нÿ“·ürøR_?è^ÿÉÛþ9K’=>µ_ùßÞÎRéÞø½à°_êWlû~ÿ·L²mÙoÆÜ(Æwý{]xüþ ð/xûOûÚ¿´<ïßI&í¶ÿ/ßcŒnnžµìÒKDe)JošNì(¢Šd…Q@Q@yÿÆßù$:ïý»ÿéDuèçÿäë¿öïÿ¥ÐOü(/ÐÄ¿øÿ£þ‡¿è?â_ü ÿÖ·ü)/‡Ÿô/äíÇÿ£þ—ÃÏú¿òvãÿŽTòG±¿Ö«ÿ;ûÙ“ÿ Ãßôñ/þGÿÆë½ø3£ÜüM“à «j¾Wö ¿ŠâyGY¼ï,òŒ ë´`Ÿï ôøR_?è^ÿÉÛþ9Xþð¶á޲Xhvdµ“ÃFfO5äË› Ë’z(ü¨QŠÙ:õf­)6½J~øƒq=ÃxÃYŽw@‰&–E›ÎX1Ë–…8ã§~Ñø“öþÞÔcºÿ„ÓU—dB<êköÉ8$ðû“ ÏÝÇ\œó^ÑEQ‘ó_…¼-%´¾ °°ñ/ˆl-¬u››Xã³¾òƒ„Ú0ÈÆOt?ðŒßÿÐïãü·øS|7à? x£Vñ…ö³¦ý¦æ?Þ¯çȘ@À…`:±üë ÿ…=à?úäÜÿü]|6cœ*8©ÓöÓV{(¦¿ô¥ùSPW¦ŸÏþƒÿÍÿýþ0ÿÁ«…føƒÃ×°xkU•üaâ©Õ,æc ú›4r„íaŽTô#Ò»øSÞÿ þMÏÿÅÖO‰þø/Nðž³}k£yw6Ö3ÍýªcµÖ6 ྠu®j9â•HÇÛOV¾Ìù2å:Vº_üÒü ÿ$óÃ_ö µÿÑK]sþÿ’yá¯ûZÿ襮‚¿B<“Ïþ6ÿÉ!×íßÿJ#®3þ›ÿúüaÿƒVÿ ìþ6ÿÉ!×íßÿJ#¬¯øSÞÿ þMÏÿÅ×Îq=a=Ÿ¿(Þÿ Nö¶÷hì¸+óAKæ`ÿÂ3ÿC¿Œ?ðjßáGü#7ÿô;øÃÿ­þ½ÿ {Àôÿɹÿøº?áOxþ€_ù7?ÿ_9ý¼¿çýOü?ü™×ÏKþ}/¿þÍèúeÆñ{Áž~·¬j~gÛ±ý¥vfòñn~îGÏ>¸•ï5á³øw¾ø¥à»Ë8àÒ­eûwÚfžé¶q-#9r;g5êÞñ†ã yçÐu(ï (ÈÈHÈʰœ`àúû\¦¿Ö0p«Ìå{êÕžíl›üÏ:»N£åV]Êñø<+ÿ 7ÅïÿÄû\Ò¾ÏýŸÿ «Ï#ÍÝoü|ão™>µÞj^?ð¦âеrÒ×P(Ç)*¨$o|lB@È A9£>w¥ø_Â?>)xâòþ5kX~Áöi »}œÀCá£`(|cë^‰‰Ô¬ÿ©÷Ç?ø8ÿì(ÿ…YÿSïŽðqÿØQÿ Káçý ßù;qÿÇ(ÿ…%ðóþ…ïü¸ÿã”âφßaðn¹yÿ ¯Œ®<>â_&ãUß›ccµ×o*q‚;Šî< ÿ$óÃ_ö µÿÑK\?‹>xLðn¹g¡ywVº}ÄпÚç;]cb§ðp@ë]Ç?äžxkþÁV¿ú)h ¯7øÅköí;ÂvhžßÏñ-œ^u»ì’=ÂA¹³ äƽ"¼ßã¾§§xNÂò?2ÖëĶpÌ›ˆÜŒ$ 29ô  ð«?ê}ñÏþ?û ?áVÔûãŸüöÂ’øyÿB÷þNÜñÊ?áI|<ÿ¡{ÿ'n?øåð«?ê}ñÏþ?û ÇðÞÿçÇY,ÿµõ]S†Œ¾v§sçȹ¹hl /Ç©>µ±ÿ Káçý ßù;qÿÇ+Ã~Ѽ#ñÖK Ïì–²xhÌÉæ¼™sr9rOE•zÅQ@/àÿÿÂG¨øÆóþoé{µîQEQEQEWŸümÿ’C®ÿÛ¿þ”G^^ñ·þI»ÿnÿúQgÿ°øIÿ>–?ø5—ÿŽÑÿ Ãá'üúXÿàÖ_þ;Yÿð™| ÿŸ]ÿMÿÆhÿ„Ëàgüúèø"oþ3@ð¬>Ï¥þ eÿãµGÂZ‡|=ñÂ[O ÇvOá³+ˆ®aæ•–f=ñšoü&_?ç×CÿÁñšÁZŸ…5_“OàøíO_2H-m ºù¿iRr¥W'i^qéé@ÉEP†øcÁ¾ñ«ã ¿Am%êx’ö$2Þ¼'Ë á]GRÜâº/øV ?çÒÇÿ²ÿñÚäôᾕ¬ø¾C¦¾ Þ#½xÍÖšnÊÜÃl Á¸Ï¯­nÂeð3þ}t?ü7ÿ  øV ?çÒÇÿ²ÿñÚÉñOï…ö>Ö®ôû[5½‚ÂymÊêr1,lWÈAä1SÂeð3þ}t?ü7ÿ¬¿ø³àÕÏ…ux4»mjYL–¦=‘„¥L7”6Øç#êžÿ’yá¯ûZÿ襮‚¹ÿÉ<ð×ý‚­ôR×A@ñ·þI»ÿnÿúQgÿ°øIÿ>–?ø5—ÿŽÖ‡Æßù$:ïý»ÿéDuÏÿÂeð3þ}t?ü7ÿ  øV ?çÒÇÿ²ÿñÚ?áX|$ÿŸKüËÿÇk?þ/Ÿóë¡ÿà‰¿øÍð™| ÿŸ]ÿMÿÆhŸñ7ÃOÝxïÂ:Š>Ïk¨ý³íexeÝÄ9ráyÏnyü:ö~ðŽ›öÏí_?WóeÍ¿›#ÃäGÙvÃssË…^se¬øWø½àŸøBâ±ÊûwÚþÉ`m³›“9EÝÑýqÏ­{…xþ­û<øjÿ\‚æÊê}3LX¶MgçyæùIJ3mê¼m?wß#Îæð'†4ø‹C¹/=­Ù¾Î÷w_ç‹{d®Ðy>ž•õ%|“ñ‰àO‹Þ ó‚óö|esÿ.ñÕCõÙ}_Úû>o´ôµµòÞÖùœ¸ÊR«BP„œ[¶«}ÍïøD<ÿ<-ÿð1ÿøº?áðOüð·ÿÀÇÿâëʼë/Dÿ¾?úÔyÖ^‰ÿ|õ«£ýMÿ©œð/þØð¿²±_ô?ÇüÏHÖ<-á }þ{h`ÛHñväî Hãw<×оÿ’yá¯ûZÿ襯Œ$–ÐÄáBn*qòwü«ìÿÉ<ð×ý‚­ôRÖ52Ÿì×Éõ…ZúÝ;ÛËvzÙv¥ÉT¨ç~ÿðìè+Ë>;CÇ…ô.@6òkÖé(-´¤ óÛŠõ:ò/Ú(¨ðš[î^,ý<©j=£ä½¯¥ûyóMŤìpŸðˆx'þx[ÿàcÿñtÂ!àŸùáoÿÿÅוyÖ^‰ÿ|õ¨ó¬½þøÿëWOú›ÿS8ÿà_ý±ó_ÙX¯úŸãþgªÿÂ!àŸùáoÿÿÅÖÿ­7KÒþ/\Á¤¢-»h,ìRãwž€òIìx_eèŸ÷ÇÿZ½Sö{h›â=ñ‹²$è1ÏENú‚öÿ]Uzr§}úîö;p8ôks䤻;Ûó>™¢Š+Ú<7à ð7ˆu_]øš i/SÄ—±!–õá>X`G ê:–çѰøIÿ>–?ø5—ÿŽ×' ëÿ ô­gÅðxÂ5õñëÆn´ÓpÞVà#`n Æ}}ksþ/Ÿóë¡ÿà‰¿øÍh°øIÿ>–?ø5—ÿŽ×'ñ/Àôo‡Ú¦¡¡[Ú&¥•ä´z„’°Ì¨­….AùIí[Ÿð™| ÿŸ]ÿMÿÆk˜ø‰â_…:‡5+_ [éI«¿•ös’иĨ[cùCwç¥}EPEPEÉèþ8²}$ÝøŠ{MF½»·†;Û˜¢ó)™2?xÀVçïGÊT ž;ÿ’yâ_û]覯–,® NTÒwïÿ޼&2¦NPK^çÈð’éó÷ÿßü(ÿ„—HÿŸ¿ü†ÿá_GüSÖo4†zÞ£`þ]Òı$€SÌuŒ²A ’bs~ ŽûCø–ºñN¥¯i÷^MDÍyqç+Ëççj$€ Îy-€G¥þ±â¿–?sÿ3»ûsü«ñÿ3Í>êVšÅï }–o3ËûVfÝñÔ{ú~Š+ÇÅbg‰ªëOwÛÒÇ™ˆ¯*õInŠ(®s¢Š(¢Š(¯?øÛÿ$‡]ÿ·ý(޽Šóÿø]¿?èaÿÉ+þ7Gü.߇Ÿô0ÿä•Çÿ¯@¬ZÜ^ø_Q·µÖ¿±$h‰:ŽÐ~΀åÛ’1ò†²6ç äP/ÿ ·áçý ?ù%qÿÆëÃ~)Ѽ]ñÖKýóív±øhÂÏå6œìÇR+“ø¨é×wŠlìu ï|½n[…šãq’XdG#±,Þ[ß#2(¼þ6ðïŒ~/xûPûgÙ´<ïÜÉÝÖÿ/ßQœínž•ìQ@xü6ð/xïûPûÚ¿³üŸÜÉ&í¶ÿ7ÜSŒn^¾µìPŸÿÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝz|ÿñ.Ùø¿Xñ kz­·„¬îìâ¹NÕ6ÈˆÑÆda°­’8`2dVÁÔx³âÿ5?ë–zï™uu§ÜC }’q¹Ú6 2S$޵ÜxþI熿ìkÿ¢–º (¯7øÅ}o¦iÞ¿¼“˵µñ-œÓ>Òv¢‰ N=+Ò( ?ÿ…Ûðóþ†ü’¸ÿãtÂíøyÿCþI\ñºô óÿ‹úÞ³£øJÒ æ;[ÝSP‹N ÛLBEs•oà?(»HÁÁü.߇Ÿô0ÿä•Çÿ®Rø½áK‹‹â;y®õ <èBĵ¬XKç—Álãhëî=ñÞ|¿Ônü5®±w}sªéÚ„öW†òE¤ŠA*®2Y@aËs¸´-v𖓦ë6ëoªiö—Ð+‡Xî¡YT6È Î ÷4å~ý ü;ª}«û~ì?/g“óÉsçg;¾äcn0½zîö§jŸ´/…ì5¸­í-îu-5 Þ÷–À«¤™#g— \ðÝ»¿N+д踿ŸDÓc²{÷p#vÚÄ# I æá@û n©à¿ ëzÜZÆ«£Û_^ÅÙÑ®A‘dœl'aå8Ï=hå£â.çYÖï̲Cö§=ÔI$gpGlŒíÈÏãRÂK¤ÏßþCð¯®à‚­íâŽ"@‘Ç…TP0€ã%{X|÷B”iF1²òæz”sjôiªqJË×üÏ?á%Ò?çïÿ!¿øV~¹®iך<ðAq¾VÛ…ØÃ8`{j÷?è 7ÄMÏÅþ5MCSw¾-ïØ[Ù[e‰ à3-p¤‚s€¯ë•UsüMZr§(ÆÍ5³ëó*¦q^¤VjÝÌ(¢ŠðÏ$(¢Š+ŸÂKÉ}ª´mwqt‚+ù-¼³3ùŽ¿¹)¹wFýÄdóŠè( U𵯇ÿ´|Q»®@Öš{ØÐ\È t¥î#w9bÇ—äàg  dÅñÇBš$–=\(êNË~Aÿ¶Õ×xïþIç‰ìuÿ¢š¾S±½Ô’ÂÙSJÞ‚% ÿhQ¸`sŽÕ•Zœ‹§ÌïÀa&M5'oåMþIžù?Æ_][Ëoqáífh%B’G$VÌ®¤`‚ Ø Ž1\ÿ†¼_ð÷ÂÜh^ ÖmgI!1HÛAÎyÉ8$ g=yWÛõOúÿäÊÿ…oÕ?èÿ“+þ‡Öu÷¯ó=?ìhÿ%Oü_ü‰ô>…ñkF×¼Gc¡Ã¦ê¶÷W¾g”ó¤[>D.rVF#é]õ|ƒ£jÞ$³ñž‘}£è±Ëª[¥ËÛÛ»y¢Qå6þ©$.â9'g¡ôÏ|Eø™©jÚä7^mNh %¬šhôÿ°î @ă{nIÆ=릜¹¢™ãbèª4š·ug·f‘îTWŸÿÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øUœÇ Q^ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿá@Eyÿü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…zcø§Ä–~ðåÞ¹òZÚìÞ(.w: À$¬;×®ø×⧇µ;ŸøWß`òm%“íŸÛ6òýŸO™³oÍ·®ÞøÅy·ñâˆþOi«i?hÑ§Ûæjÿ`‘7m˜ó®#0 Âûuæ€=þn‰ÿ@=sþø·ÿãÕOUø³ákK¸Óoô=¥Êl•¢Œ²÷’ppzžFAàšñ/·êŸôÿÉ•ÿ >ߪÐÿ&Wü+ëºû×ùŸEýä©ÿ€Ëÿ‘=gÃ6ðƒ¾Õýá=rÏí[<ïž97mÎß¿pqÍÓÖ» üLÒ|[¯K£ZXjV×QÚ›¢n’0¥*ðQÛœ°ü|ÝöýSþ€ÿù2¿á]çÁ®&ø¥x×ßgq¢¸ æÈóãç"´§YÎVºûÿàœ¸Üº8z\ê3^±i~)DÑEÐxç'eãí[J-/ÃzÍêiײØÍ,mj‹æÆFà7̤ŽAÎ;ÕÏøHuOú5ÏûýeÿÉãzGÄËßkž.Ó­ü9ý¤’x†òs7Û„;I`»v”9û¹Ï½lÂúÕ?èGÿʲÿñºÍÕ¦œ—ÞuÃ/ÅÔŠœ)I§ÕEÛò: ºŠ5FÔõ†zÌ÷ŒU¼¶ˆ¸7lºœq“ή‚K¡hÒÎþ Ömtû rì#k-±D‹žn:íÅyÿü/­Sþ„ü«/ÿ¬Ï|jÔµ_ êÚsø;ìéwg4 7öš¿– îÛåŒã9ÅÚŸó/¼§–ãR»£/üÿ‘îzN¥³£Xê–ë"Á{oÄk ‚º†à‘œSW+Ÿð'ü“Ï Ø*×ÿE-t¡Äcø§Ä–~ðåÞ¹òZÚìÞ(.w: À$¬;ÕøHuOú5ÏûýeÿÉÏümÿ’C®ÿÛ¿þ”GGü%¿ÿè˜å~ßü( ÿ„‡Tÿ¡3\ÿ¿Ö_ü‘YúTÏ¢}·û;Àšä?n»’öãý&Ñ·ÌøÜÜÜœgíYÿð–üCÿ¢aÿ•ûð£þ߈ôL?ò¿oþ°¾4XüG¥hwú«§ÝjžwÙžsnÈÞRo|˜åb8ÇnÿZê+ÇçÕüEª|^ð'öÿ…ÿ°ü¿í'ý>;Ÿ;6ÿ7Ün0½zîö¯` ¹vñ¢Éâ=WC°Ðu]BëKò~ÒðuEóSz`É*“Æ{vúWQ^?¯â-/â÷Žÿ°lœç&´?á-ø‡ÿDÃÿ+öÿáGü%¿ÿè˜å~ßü(bÿÅ÷zfsyár;[XžiŸÌ³;QA,p.2pé[šN¥³£Xê–ë"Á{oÄk ‚º†à‘œS^gâÏøîãÁºä7Ÿ¾Ék&Ÿp“\mÁ'”†6 û@Ë`dàuÅwÿ’yá¯ûZÿ襠‚°üOâ‹o [ØKqiwt÷÷±ØÁ¨MÍ+ƒ´ì —Ïq[•æÿ¦¸·Ó¼'5¯Úî£ñ-›Ãoæü×B©¸ð¹8=3@Gü$:§ý šçýþ²ÿ䊧ªÞK®iwf§à=fæÎá6KËe†øAÁr`ŠËÿ„·âýü¯Ûÿ…ð–üCÿ¢aÿ•ûð  šO ik¦è¿õ›K@åö,ölYRÌ×$±è2IàÐ ÐÓ|a÷Š›Ã—F¥§êÈߺòYL[Âdä~wžÇÛ8ð–üCÿ¢aÿ•ûð¬ ê:ΧñÖIµÍûé|4QmþØ—;“í ‡Üƒ$‘ozõŠ(¢€<âïã>…iª_Ødë3Icu%¬¯pí.ƒÒƒÂ¢ÿ…Û¢Ð\ÿ¾-ÿøõyaðÏŠµ¯x²çCоßjºõäm/Úâ‹%pä„ûÔÿð|Cÿ¡Cÿ*VÿüUc'Rú#Ñ¥OàI´úÿV;ÏøZ>ÿ„‡ûþ½sûOìŸbóÿsþ§~ý»|ý¿{œã>õnã㯇í içѵÄq“åÀq“ŽÓWœÂñþ…ü©[ÿñU‹âÏøËKðÍåæ­áϱØÇ³ÌŸíÐÉ·. |ªÄœ’´)U¾¨u)`T[„ÝúV>²¢Š+cÍ (¢€ (¢€9ÿÿÉ<ñ/ý‚®¿ôSWÏ:?‚Õæy~Zíߎ7cǬêR…Ei«˜<“–\­ï¢šgŸÂñþ…ü©[ÿñTÂñþ…ü©[ÿñUë?ñwÿêFÿɺ?âïÿÔÿ“uÔè/âÏCýdÍ?çïþKò<ÏÂñ…ñ{Â_ÛúGööÏ'ý&9wâÝ·}ÂqŒ¯_Zú2¼~øL?áoxþ¿ì?ùˆ}›û+ÎÿŸ›™ÿÆ=ëØ+¢Œ#Ë+‰«‰ªëVw“ÝþŠ(ª0 (¢€ (¢€ óÿ¿òHußûwÿÒˆëÐ+Ïþ6ÿÉ!×íßÿJ# þÃñwý ù=øÑý‡âïúÿòz/ñ¯Pÿ„Gâ‡üöðýõsÿÄÑÿÅùíàÿûêçÿ‰¯šú¾cÿ>á÷Ëÿ’>³ýb­ÿ?¥ÿ€Çÿ‘<¿ûÅßô-ÿäô_ã]oÁëMFÏâÅÜzØç:²Ç欙_>>r¼ujèÿáø¡ÿ=¼ÿ}\ÿñ5‚´í{LøÛ4>!m5®ÛÃŒètòæ=ŸiP3¼gvC{cÙ¥Œ[Ö„Rònÿ‹ga›ÔÅPöR¨åäÔWä‘ìtQE{'€|§}¦ë·ž-ñTšf‘öȹv­'ÚR<7˜xÃô#Ÿzoö‹¿è[ÿÉè¿ÆºÍMñ}æ¹âù4ÐÅ ñâ¿ÛÌ»÷îÆÁŒ`¯¾s[?Ø?¿ç§„ÿ;Ÿð¯ÇfÏ8^>­ßç©ôø<â½§’I-­Ö'aø»þ…¿üž‹üj®§£ø¢-*òKùP,ÒIöÈ›j…98'µzöįùéá?Îçü+3Äz/ļ1«Izþ6‰g3Mä›ûÛr1œgã5…,Îõ"¯OuÖ_æmS;ÄJ.¬µ_ËþDõÏÉ<ð×ý‚­ôR×A_.]j?WºU½¥¶¥=” f4¨ÛÊDM¬^=Ò!a‚rFrÃȧ®ÿÂßÿ„{Sÿ‘Sþ=%ÿÚþÓ÷ú¯úiýß|WÛ$h|mÿ’C®ÿÛ¿þ”GYð´<]ÿDëÿ+qñåΣñfçáæ¢¾!¶»o¢Eo3j$R¡Y#(Ã;erHQ¸îsg‘‘è_Ø?¿ç§„ÿ;Ÿð¯6Ì'ƒääqW¿Å~–ÚÞ§VßÚßäjÿÂÐñwý¯ü­ÅÿÄQÿ CÅßôN¿ò·ÿY_Ø?¿ç§„ÿ;Ÿð£ûâWüôðŸçsþã¬ÿžŸÞÿÌêöNòü?ÈH|Q«ø“â÷ÿµ|;ýö·ùé©qæî·çî·G×>ÕíUáze‡‰l¾/x/þ&ÒNÿ·yÙæNÖçvíÿUÆ=ëÝ+érüD±xÕ“M»í¶í£ͨmæãðjþ"Òþ/xïûÂÿÛžgöþŸ·“‹—9n6û×°W}£Å°|^ñÏü"ÃD;¾ÁöíO7þ}þ]ž_ü 9ö­ë×§B›©UÚ+©šM»#¥ÿ„·âýü¯Ûÿ…ð–üCÿ¢aÿ•ûðªŸÚ¿»à¯ÊëühþÐøµýß~W_ã^öæ]ÿ?‘~Ê}Š^,ñ?Žî<®Cyðëì–²i÷ 5ÇöÜyHc`Ï´ ¶N\WqàOù'žÿ°U¯þŠZóï_|Okk¨/„>Äl'g>g—å¶í™ãv3ŒñšôÉ<ð×ý‚­ôR×fCœ¨II.ÄÊ.;y¿Æ)®-ôï Ígkö»¨üKfðÛù‚?5Àªn<.NOLפW›übûgöw„ÿ³¼·ÂKgö´gËó1&ÝøçnqœsŠé$±ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿáGü]ÿú‘¿ònø»ÿõ#äÝð–üCÿ¢aÿ•ûð¬ ê:ΧñÖIµÍûé|4QmþØ—;“í ‡Üƒ$‘ozØÿ‹¿ÿR7þMÖ?†ÿá$ÿ…ë'ü%Ù_nÿ„hìþÌó<¿/í#ó9ÝÞØÅzÅQ@/àýsÅZf£ãt?mZ·‰o]®?´â¶ÚùPSkŒœ}ý«¨ÿ„·âýü¯Ûÿ…rþÿ„óûGÆ?ð‹ÿÂ9öøIowÿiùþg™•Î<¾6ão¾s]Gü]ÿú‘¿òn€øK~!ÿÑ0ÿÊý¿øWñKÄ^2¾øq«[jÞþ˱'̼þ׆/!"Œœ3žÕØÅßÿ©ÿ&ëø¥ÿ þÆ­ý¿ÿ§ögî|ï°}£Îÿ\›vïù~öÜç¶hÜ(¢Š(¢Š(¢Šçüwÿ$óÄ¿ö ºÿÑM\¯ƒâñùðN‚lï<4¶§N·òDÖ“— å®ÝÄH8Æpu^;ÿ’yâ_û]覮Â~ñÝǃt9¬þ"ý’ÖM>Ýá·þÄ‚O) jU7–ÀÀÉëŠê<ŸˆÿóýáOü¸ÿã´y?ÿçûŸøqÿÇk?þ/ˆôSÿòoþ4Â%ñþŠþP-ÿÆ€1õñ"|^ðü$éRçûCÈþφHñþŽ7nÞÍŸáÆ1Þ½b¼~}#ÄZ_Åïoø£ûsÌþÐòÐ#¶òqoó}Âwg+צßzö (¢Š(¢Š(¢Š+Ïþ6ÿÉ!×íßÿJ#¯@¯?øÛÿ$‡]ÿ·ý(Ž€4<ŸˆÿóýáOü¸ÿã´y?ÿçûŸøqÿÇk?þ/ˆôSÿòoþ4Â%ñþŠþP-ÿÆ€4<ŸˆÿóýáOü¸ÿãµÎè‹­¯Ç—ôº|·_ðŒ†Â'DÙö¡Œ‡f9Î{úV—ü"_ÿè§ÿåßükÃzv³¦|u’s^þÚºo [±¥¶ÔûH6¡ÁÁçßÚ€=bŠ( ðeŠnoübúÖ°ñ5èe½‚W}ù\QÀÆ1ÛÖºŸìŸˆ_ôðÇþÜñÊ伡ø«SÔ|c6‡ãì[Uñ-ê5¿ödW;Ÿ*Kîs‘@Ç·½uð‰|Cÿ¢Ÿÿ” ñ¯:®Q­7R¥$ÛÝ–ªI+&Ký“ñ þ‚ÿÀ;þ9Xþ-Ó"ý®Ö=>áæ·þÄ‚?5lY7•ÈÈÈ隈ä¹|Z’¤®ƒÚO¹ÜxþI熿ìkÿ¢–º çü ÿ$óÃ_ö µÿÑK]z„ñ·þI»ÿnÿúQrßð“øãûþÿÀiÿøåu?äë¿öïÿ¥Ö/ü*_ÑBÿÊ,_ü]xù®],g'*‹µþ/;m£ìpcV=òýNj;Þÿ‡GæPÿ„ŸÇßð÷þOÿÇ(ÿ„ŸÇßð÷þOÿÇ*ÿü*_ÑBÿÊ,_ü]ð©|QÿE ÿ(±ñu䫵?–wü‡—>ÿŸÑþ¿íÓMÔõÍKâ÷‚¿¶[N>_Û¼¯±ÄéÖÜçvælôÆ;׺W‰[xKTð·Åïix‡û_í?oòÿЖßÊÛoÏÝ'vw¦=ëÛkèðw†ÃÆ“¶—ÛmÛ=l2®©/¬´çÕ­·Ó·@¯ Ôµ=sMø½ã_ìfÓ‡™ö7í‘;ô·Ûµ—NsžÕî•ò×Äí_TÒ¾/x—û6óìÞgÙ|ÏÝ+îźcïާó­ëeÕ³(<%©J['¶šþHX·]Qo$§Ñ½·ô}óþÃßø ?ÿ£þÃßø ?ÿ¯ÿ„¿ÅôÿÉX¿øš?á/ñGýòV/þ&¸âçóâz<_iÏèýÏÿ‘=CÄ~"ñŒþÕ¢º} ìïe2Ëå[ÌiBÒd tȯ[ð'ü“Ï Ø*×ÿE-|›yâŸÜXÜA>«¾#dtûx«Äš6‰ª[ø†Òî ôÈ5¿’UkueÜ"\7ާåúðù+&Nž* .Z«[ô=<±Ž/ësR}-ÿ ¦ëÍ>4Ïqk£xbâÓÊûL^#µx¼ÐJo !€ ‘œgU/ü:ñö¿á˽2oˆ_Ç>ÌÛO¥GnµÕ¹‘72ãàã yWŠ~xáß„¬¯o|A«ÇåXÚ—xO-ˆ˜îÀßòÆÞ˜çµtrN~ì7{z³¿+åÜôÿøMü}ëá¯üŸÿŽÑÿ ¿½|5ÿ€³ÿñÚñ/øKüQÿAŸü•‹ÿ‰£þÿÐgÿ%bÿâjÿÕ^)í¾'Î[8ÿŸ°ûŸÿ"{oü&þ>õð×þÏÿÇj/꺾¯ñ¶kkì?i_2/Ø£tMŸiR2˜ç$÷ô¯ÿ„¿ÅôÿÉX¿øšïþj7úŸÅ ÉõŸ´Lº+ -S çFq…¹4žEž`¿}ŽK“É­úluàVaí¿Ú*EDzßòGÑ”QEIíCàhüZ×¾2: Ή¯ü$÷»Åý¼®ûò¹ÁGQŒc·­uÞOÄùþð§þÜñÚðÿø«Ã>.ñU–‡¬}ŽÕõ˹Z?³E&\ÈA9u'¢Ž=©Ÿð¶þ!ÿÐÍÿ’ÿüEzô2,½5V;Åí¬ÌÍÕ‚vlúÉøÿ?ÞÿÀ;þ;\_ňül¿ µs«Ýø~KÜù«im2J|˜ÚZB8ÎGL×™ÂÛø‡ÿC7þH[ÿñ™âˆ¾2×t+7V×~Óc6ß2²B›°Á‡* Ž@<ÕUx1¥ T;$›zÇeóV Ù3ìJ(¢¼c@¢Š(¢Š¯g}o¨@ÓZÉæF²É ;HÃÆíŽ}X{ãŽ(ÇòO—ö†[ÿ@çw×0?óäƒÀÒêü^ð¯ÛµOQÇÚö}ºé¦òÿÑÛ;sÓ*¥:•\éG–=¾G™ˆœ'QÊš²ìQEs˜…Q@Q@yÿÆßù$:ïý»ÿéDuèâŸ Ùø»Ã—zü“Çku³{ÀÀ8Úêã‚:¨í@¿ü*ÏúŸ|sÿƒþÂøUŸõ>øçÿý…ð«?ê}ñÏþ?û Ëñ€¦Ð¼?y©[ø«â§< vvš©i%b@„ã'$ààpqŠÔÿ…YÿSïŽðqÿØV?†ôøG>:Égý¯ªê›ü4eóµ;Ÿ>EÍÈC`axÎ=Iõ¬ÿ øVÿ]¼Ôôí__ñæ©iþSIñ\+G mŒPsò6F8ãžxî<7ðòÏÞ!“\þÛ×5Kç´6{õ;±>ØË‡À;A\rxæ€; (¢€<_Áþ ÿ„Qñçü$Þ#Òöx–ö/'L¿ò#l;Ší9nqŸ@=+¨ÿ…YÿSïŽðqÿØQ'Â{?íBòÏÅ^+Óþßw%äÐØê(üÇ9b'Ðs“€9£þgýO¾9ÿÁÇÿa@ü*ÏúŸ|sÿƒþ±üYðÛì> ×/?á5ñ•Ç‘§ÜKäÜj»ã“llvºíåN0Gq\¿ôý[—^¥ã›6‘ÚþçÅqZ¤ŒÁÎÈÃ!.À!ùGÍÇÝÁRݤ áÖthžãÆøçÿý…gëž³ðö‡{«ßü@ñÊZÚDÒ¾5 c¢®TÄàžI¹ÿ‡žÕ“ðÊÏLñŸ®Mâ?ê—VgÙÓR¾¢ùˆQ¸)‘{ÐzWq@x×ý+Ç¿¼oý§w¨[ý‹ì_ØäTݾßÛ•³÷:w¯n®Vøeg©øP×!ñˆô»«ÿ/í ¦ß¼´¼ÉÀÉê}j£)AóEÙÍÿÃ;øWþ‚þ ÿÀ˜¿øÝðÎþÿ ¿ˆ?ð&/þ7[Ÿð«?ê}ñÏþ?û òjwÄ |#g«xçPºà /ÿ 8‹ÍyU ŒH¸Ÿ~+_­Wþw÷±r®ÇOâoÑü)¬jpjšãÍgc5Äk%ÄeY‘ @ŒdsÍz‡?äžxkþÁV¿ú)k›Ÿá$7VòÛÜx߯³A*’95`ÊêF ¦#ŒWq¤é°èÚ5Ž—nÒ4VñÛÆÒXª(PNÀôœêNzÍ·ê $\¯'ý bø/G…‹“Z…ISÈÌrŽ+Ö+œñŸƒ4ÿi0iÚÅå¼P\­Ê=£ª>õVQË)ãæ?¥(IFI¾…E¤Ó{.Â-iÿ?w¿÷ð…ð‹ZÏÝïýüá^ãÿ Ãÿô0x›ÿcÿãuÍøçá¿„| á©5‹ísÅ3âxñšR Ÿ+ 0¤’zp À?Kýµ„ÿŸoïÿ‚zÿ[Àϓ̿á´ÿŸ»ßûø?»ς6é߯!ŽIdVÑ]³#dÿ¯Œz{V>è^5ð¶º×Þ)°»ªÄÚ”R Åw+ÁÆA€rÁ>ƒá…º?ƒ5Éu{-GUº¹’ØÛ¶Ì’…•¸Âœ¨ïÜ×;2¡ˆ£ìéÁ§êsâkág R§ÊûÅQ^!çŸ"kz,ñ_‡nôMGÅšBÚÝÐêp«¬I#ªŽÕ‹áûÃÅRx[ø•¡êÚ‡özéñ´rAl¢=ûÉ`$l±lr0<1áñ"ÿ¨oþ9GüH¿êÿŽV~ßÈëþÌÿ§ˆú²ÇÅžÔï#³°ñ•wu&vCìr;`p äà د—~}ƒþ÷†~ÅöoùzßämÿŸwÆqø×ÔU¬eÌ®pW¥ìª8^ö (¢¨È(¢Š(¢Š*½õýž™g%åýÜ–±ã|ÓÈ#EÉe$øÕŠóÿ¿òHußûwÿÒˆè ÿ„ïÁÿô5èø1‡ÿŠªz¯<=u¥ÜA¦xãÃö7’&Ø®^ê„GûÛ7€N3Œœgp~gÿ‰ýCñÊ?âEÿPßür°öþG©ý™ÿOïž Õ4 Ú]A¨|GÐõc;¬žsÝ"J\.Ögv™Ë’lÀã§i¦ø—AÖnßKÖôÛéÕ ´v·I+È!I8É>â¾Oÿ‰ýCñÊîþ }“þ•çØü/ûóäãóãôïÒª9¬cˆÁ{só¦}EV§ ‡?<+kq-½Ç‰thg‰ÊI—ñ+#‚-Aãü'~ÿ¡¯CÿÁŒ?üUy¿ƒÿáþÑñü%ðŽ}»þ[ÝŸÚ~G™ååq3¹Ýíœ×Qÿƒþ¤oü”  ¿Ï¡ø¹gµü1›,A~ËuµÉŠL2™#s"•l7’àŒâºËø>ÇN¶³ÿ„ËJ¸ò"H¼ëR’M  ÎÛ¹cŒ“ÜÖ?üZú‘¿òR±üYÿ ³þÝsû;þß·gÜ}Ÿìÿeó<Ï-¶ìÇ;³ŒcœÐªA<7Vñ\[ËÐJã’6 ®¤dGÎjJçü ÿ$óÃ_ö µÿÑK]W¾¿³Ó,伿»‚ÒÖã' i¬Ç oC•ÉVìpÃÝAêdø"Oø/ûcoôÛÿíÖºýþ§òÁèd;ŸûÏÁl Ž*çüZú‘¿òRø´õ#ä¥u>,ðÞ§y‡ˆ4«»©3²/c‘Û“…'øVÅxüÿð‡ÿÂÞð'ü"ŸØóûOöW“ÿ>ÿ.ÿ/þŒû×°PX÷Þ,ðÞ™y%ÿˆ4«K¨ñ¾ïc× •'# ƒøÖÅxüð‡ÿÂÞñßü%Øóû7ö¯“ÿ>ÿ6Ï3þœ{P Âwàÿúô?üÃÿÅWø£Lð¿Šo5¹ø‰áˆôÛûˆfš5†Óí F\ o™IÈäué?âÐÔÿ’”Å ÿ©ÿ%( ÿ„ïÁÿô5èø1‡ÿŠ­È'†êÞ+‹ycš PÓ¢Š+cÍ (¢€ (¢€9ÿÿÉ<ñ/ý‚®¿ôSWÉV—º YÀ³$hCæÜ“»óŠú×ÇòOc!ÏéŽG=qØxkÀ>ð…Ä÷“¬ó I$2<´àb@Î ÀÏAMñÃï x§U‡SÖ´¥»»†5‰¦‘@@Å€*¬òǨï^ùó'Ëš–¯ Þø›_½vŠX®µK‰à‘à$´lä©äduèy¨>ßáßî[ÿà1ÿâk¾Ò¼Oáß k)Ó.îc²Ù®Ý˜`ŽÝö¬a‚¨T€Ò1íZ¿ð²¼#ÿAoü–—ÿ‰¯›Åâ«Æ¼£jû¦ìÿòV}f 3öXxC÷z.±×çï#Ë>ßáßî[ÿà1ÿâj+»ÝìçXR4ÆÁ1nAÝŽ9ÅzÇü,¯ÿÐ[ÿ%¥ÿâk?^øƒá{ßêv¶úžùç´–8×ìòÌÈ@+ަ±§‹Ä9¤ðóZ÷ü‰½\ÙÊ ~ëoåÿímð'ü“Ï Ø*×ÿE-tÏøþI熿ìkÿ¢–º ú£ã?øÛÿ$‡]ÿ·ý(޾oû‡¹oÿ€Çÿ‰¯¤>6ÿÉ!×íßÿJ#® þW„è-ÿ’Òÿñ5äæ•ªSää§)ÞûtÛ}íäØÏ«sü:ÛâWï¶«æygÛü;ýËü?üMoðï÷-ÿðÿñ5êð²¼#ÿAoü–—ÿ‰£þW„è-ÿ’Òÿñ5ä}sÿ@óûßÿ"{ŸÚïþà?ý±Ê|/ŸNŸâ÷†ÿ³Ö1µoÙÏùw|vï_TW€è%ÒÛæ~íÓnës¼z÷êú, å<ÕàÒí´a¨Ie2Z˜ôVF”!0ÞPÚwcœŒWªxþI熿ìkÿ¢–¸|_ð&§àÝrÂÏ]ó.®´ûˆaO²N7;FÁFJ`d‘Ö»É<ð×ý‚­ôRÐA^gñ¦kmÃê‚3§Çâ;WºG½LAd/•ÁÜ6çŒצW›üb¾·Ó4ï ßÞIåÚÚø–ÎiŸi;QD…Ž'”ÿ —ÀÏùõÐÿðDßüføL¾Ï®‡ÿ‚&ÿã5ÐÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝsÿð™| ÿŸ]ÿMÿÆj?j~Õ~6M?ƒã´M=|8É µ´6ëæý¥IÊ•\¥yǧ¥tŸð»~ÐÃÿ’Wün±ü7âÅßd¿Ðï>×k†Œ,þSLJ ‘‡ôaùЬQEñ÷ˆ.t¨|mâ…¾XŒ§Y»#|[Žß0÷Á÷ªoðï÷-ÿðÿñ5íø‰á_ê>1°×5_²]Iâ[Ù•>Ï,™BT”R:©ü«¨ÿ…Ûðóþ†ü’¸ÿãuË<,g'''÷žî>­‡¥Q¥—Vÿ3æÿ·øwû–ÿø øš©ª]è²é²¥¢Â'8Û¶§¨Ï8ôÍ}7ÿ ·áçý ?ù%qÿÆëø¥ñKÁ¾#øq«i:N³ö‹éüŸ./²Ì›¶ÌŒydp äÑ$bÓæzy•[ˆkU§*n”2kH»ëÛSÜ(¢Šê<¢Š(¢Š(Ÿñßü“ÏÿØ*ëÿE5pþø“öèvð…xÊãÈÓíâó­ô­ñɶ5‘·r§¸®ãÇòOgÿ€¿ò<{›ø«þo‹Þÿ‰¹¥}ŸûCþB¶~G›ºßø99ÆÞ}2=kØ+Àô?ˆò|@ø½àÝú7öoØ~Ûô¯;ÌßnÙ\cg¿Z÷Êå«Jt¤áR-5Ñ«1¦ž¨(¢ŠÌaEPEP^ñ·þI»ÿnÿúQzyÿÆßù$:ïý»ÿéDtÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÑÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿá@ü-?ú¼sÿ‚þαü7¯ÿÂGñÖKÏìWKÙᣓ©Ûy6.AÜ'+Î3ê¥lÂ[ñþ‰‡þWíÿ±ü7¨ë:ŸÇY&×4ì[¥ðÑE·ûb\îO´‚r ’F=½èÖ(¢ŠðMÇ6ÞÖ|_e6‹­^³øŽöQ%¨‘, ,9ùz{ŠÜÿ…·cÿB·Š¿ð^¿ü]rÚm§Š®5ï>‡áŸíKQâÀÓ}¾(6¾á•Úüž0sïíZŸÙ¿ÿèCÿʽ¿ø×ÇãòG_:¾Æ÷{ó[ð<ÊØüÒG4SŠÙÝ™«ÿ nÇþ…oà½øºËñ/Å =CºŒøoİµÅ”Ñ &± ˆYËüžM'öoÄ?úÿò¯oþ5›â ?Çiá½Qï<ö{U³”Í7ö¤å¦Ã¹¶ƒ““Ö°¥8ÔŒ½†Í}¥þdG1ÎJT_â_æ{7?äžxkþÁV¿ú)k ®ÀŸòO<5ÿ`«_ýµÐWÛž±çÿäë¿öïÿ¥Öü-»ú¼Uÿ‚õÿâëãoü’wþÝÿô¢:âÿ³~!ÿЇÿ•{ñ¯ ;˾¹ìÿwÍkõµ¯oò8ñxœm }Vš÷»JÝ·~¦¯ü-»ú¼Uÿ‚õÿâèÿ…·cÿB·Š¿ð^¿ü]efüCÿ¡ÿ*öÿãGöoÄ?úÿò¯oþ5àÿ«Ïþ|äëüÎ?í,çþ×þ¿ÌÂñwÅHàñ_†µëMS‰ôÁwˆµ(¼!–0ƒÎ:ŸÀwÈé|ñËRÖíïTð¦¥}<.¥_@³i•UáÕŸ*r§<óÀÛÍÛÄVÿ¼ý¿ ÿenûw“þ™þgú9Ý÷:cåë×>ÕîV¶v>Øí ·óåiæòc æHßyÛXàdžM}fY‡ú¶4¹ym}/~­îzXz•ªÓS¯Y=ÖÿÖ‡…ø«ãÞ½¢ø–( ð´–z~Äs¯Ãu2äî# „AÃr¹çîŠøšŒüO­ÞøkXWÕÉÖÚÎ<Ä« Xœ2ÊqÈ?‰úìÚ?Ú?dƒíÞW‘öŸ,yž^wlÝ×nyÇL×Î>;ñ<žø½â­šÚüÿ²g÷Þ^ݶëìsߥiŽÁTÇa凧7+{«wfŸéqb*Ö£I΄y¤¶OOëC·ÿ…·cÿB·Š¿ð^¿ü]ð¶ìèVñWþ ×ÿ‹¯1ÿ…qÿ@üœüEð³®?èÿ“ƒÿˆ¯ÿR±ÿô?½žgö®oÿ>#ÿ/ó;¯üP³Ô<+¬Y/†üK \YM’k¨…Œ±ßÀäשøþI熿ìkÿ¢–¾hÔþ"O}¤ÞZËÀñûX;w)ÆÞz×Òþÿ’yá¯ûZÿ襯g+Ê+e”+QtÜìúøLN+ñ0Qkk4ÿ&΂¼ÏãMÚéú7†/Z)f[ÚÊc…w;…Yw' ôÊò¯wFÇÂZØÌ0kJv7mŽSŒöé^H:pJí«Xé“j-Çr¯ü-»ú¼Uÿ‚õÿâèÿ…·cÿB·Š¿ð^¿ü]yü,ëúäàÿâ(ÿ…qÿ@üœüE|¯ú•ÿ ýìñÿµsùñü ™éßð¶ìèVñWþ ×ÿ‹¨üâ(¼Mñ²kØl5 %O4F;èDnH¹S<|Ý}y¯ü,ëúäàÿâ+¬ø?®¿ˆ~,]ݽŸÙJhoÏ7~q¢Š+ß=#ÅüãOøG5ÙÿÂ3â=S‰oeó´Ë>5ÉQ´¶á†ã8ô#ÖºøZõ!xçÿÿýy¦Ÿñf_kþ.ÒÓ@þÐø‚òãÍû`‹p»q±¿¹œç½iÿÃG\Лÿ•AÿÆ«®~.¤TáJM>ª.ß‘.q[³¸ÿ…§ÿRŽðOÿÙ×ñKÇÿÛ5m;þ/ØyÞOúMþ›åB˜™æmÇÆ¹_þ:ãþ„ßüªþ5\5Mâßßèm៱­×—™þÞ$Ù¶E»åŒçn:÷§,»¹J”’_Ýäñî}5EWAEPEP?ã¿ù'ž%ÿ°U×þŠjøþßNÕÚ'ŽÇr2­ç(ÈÇ­}‘âËOÁºå…œ~eÕÖŸq )¸ ÎѰQ“ÀÉ#­|ûkàˆvÖÁÿŽï*5MßÚVã8ÏÞ¯k%ÆG RR•G ®‰;ýñ‘Ó†¥…¨ÚÄÉ¥ÒßðÌóïìÍ_þÿù?ÆìÍ_þÿù?ƽïÂ><°²žòëÂk¼´²ÈÚ¾d“óv²tKø’âK}G´¼ž$ÞñÇ«[î œg²FHí‘ê+è¿·)ÐT¿ðÿò³¯êyGó¿Çÿ‘ð¢ÖîÛâ÷‡~ÕoäîûNßœ6Ñß=? ú¶¼Àþñ•§Ä}VÕ¼?ö?´y’ý¶q¾Q¶zàpZ÷ªù,κ¯ŠHÏ™;jÕ¯¢ò_‘ÁZa7ñéýz…Q\AEPEP^ñ·þI»ÿnÿúQzqÿ´MGÄ5m'I·ûEôþO—õMÛfF<±p äÐ ÿ ×Wÿ¡ÿ*ÉÿÆèÿ…ë«ÿÐÿ•dÿãuÊÿ ñþ„ÿü©ÛÿñUWQðÇ´>kýGÃ1ZÚB3$Òê¶ê«Î;»’ɾƒêù7üý—ÝÿڞϱË?çãþ¿íÓ´ÿ…ë«ÿÐÿ•dÿãtÿx²çÆ?¦Ô.´ŸìÇí‡ímÀ\+nÜÇÞÆ=«Ï4=7Å%Ži4] Úô@@”C«[’™é‘»#88=ð} zÂÏx«Gñíί®h¿Ùö­¥µ²·Ú¢—/æ£ò1=íÚ¹q”rèR¾£rìÿý”sâià£Nô&ܼÿá‘ìôQEy'œx¿ƒõÏišŒaÐüýµjÞ%½v¸þÓŠÛkåAM®2p9÷ö®£þ߈ôL?ò¿oþ¢øGQñ$6~þÔµ¿Öî¯á¸þÖ‚ ¤„ùNOEœuéÅlÂ[ñþ‰‡þWíÿ€øK~!ÿÑ0ÿÊý¿øV?‹´ÚxþI熿ìkÿ¢–º ÇðÆ™àÝÂò?.ê×O·†dÜ×XÔ0ÈààƒÒ¶(Ïþ6ÿÉ!×íßÿJ#£þ߈ôL?ò¿oþ¡ñKDÔ|GðãVÒt›´_OäùqoTݶdcËžMgÿÂ[ñþ‰‡þWíÿ€øK~!ÿÑ0ÿÊý¿øQÿ oÄ?ú&ù_·ÿ ŽxöÖÞ[‹†±ÃH^I$ñ ²ª($’0æ³|?ñOÅ~)²šóEøz·vðÎÖï"ë¨$ È3ÃGõ  ³êþ"Õ>/xûÂÿØ~_ö‡“þŸÏ››î·^½w{W°W—ÅŒ¼Gñº¶­àÿìk'í~d¿ÚpÜnóaÚ8\È€zöÅz…ò—Å{[»Ÿ‹Þ"û-¿·ìÛ¾p¸ÿGLuükêÚð_xÆWõÍ[IðÿÛ¬o>ÏåËöØbÎÈUO Ùë‘È+¿,®¨b¡RSåJú¥{hüŸäkFg5îÑëýzž=ý™«ÿÐ?ÿ#'øÑý™«ÿÐ?ÿ#'ø×£ÿ ñþ„ÿü©ÛÿñUÍê“k&¥.©iv–×ã̆MR ËgŸBã_[ý¹Kþ‚¥ÿ€Çÿ•ÿSÊ?þ?ü‰ËÜiÚ¢[JòXíEBY¼å8äõ¯°< ÿ$óÃ_ö µÿÑK^ uàˆw6“AÿŽß66MßÚVçÏÞ¯ ¼'cq¦x7C°¼ËºµÓíá™7µÖ5 288 ô¯Î±‘ÅTŒ£QÎ˪Jßtbrbiai´°Òmu¿ü26+É?hewð6”‘®çmb«œdùRàW­×žüaðî³â_ i¶úÛn­µH®Z/9#ù$åÈXƼšRå©^ÖhçŠM¥-™¿³5úÿädÿ?³5úÿädÿôøA¾!ÿПÿ•;þ*ªê>ñ¶‘§Í¨øf+[HFdš]VÝUyÀçwr@¹ WÜnRÿ ©à1ÿåg§õ<£ùßãÿÈœöf¯ÿ@ÿüŒŸã^“𠛉—©u”çG…Ü:.x¬ÝBñˆl~Û¤xv Ûmå ê¶ì üÜAÁìAïZ6ø«á}Zë[Òü/Wص‘-q ìœ6õU~X:‚:äó3lΞ# 鯻ž«F’ü¢¿3øl¾œ9°òn^ðÈúNŠñ?x§âô3j1k>»Ö™|¯,Ï$ ÞÎÓ° 3Æq¸÷¦ëþ/øÀ¾)·m7ÂÙÛ%¶ç±.î[,7)•ùòƒŒùc„òOÙß\xçÅkkæ Ö.Áo1WŸ4ñÍfÿfjÿôÿÈÉþ5èkàO‰/}¨ÞÜxJ7žúòK¹<½BUg9 äã>õ'ü ßÿèOÿÊ¿ÿ_c€ÍéÑÃB›ÄJ6[r§oü‘þg¡K –Î UfÔºïþGœfjÿôÿÈÉþ5ZþÇQ†ÊI'³òâËyªqÈì+¦»Ôu Ùìî¬,⸂FŠXÛUƒ(êpA籿©|9ø‡¨éòÚÿÂ%åù˜ù¿´mÎ0Aé»Ú¯œR©BpX™;¦­ÊµÓo~h©á2¸Åºs|Ëm÷éÐú~Š(¯‹<ТŠ(¯3Ñ5ëÝB–1&¹'öƤ“™RÐ2Ü·ZÚÈ`ÛðÁ~ÿËòà/¦Q@GâmG_Ö¤Òu ß]®™e{c5ºCµw]›ÄRÙ™¢”,˜Ô4J JĪ^¹Xþ,¾¸Ó<®_ÙÉåÝZé÷BûAÚë8<:×ÏVßþ!ÜZÃ?ü%»|ÄWÇöm¹ÆF»S)¨îmGR³jš½qø— _x£á毣é‹^N‘´Hí´9Iöç $)8#$k—ð\—Úçĵ×G…µ-OµðâiÆË%R_88H‡( 1È\€|ïþ߈ô7ÿå6ßÿ‰£þ߈ô7ÿå6ßÿ‰¨öÐîtgb—ñ_æ}1Ex|uã+߈ú“«xƒí¶7ž™Øá;!fªç¨ƒÚ½ò´RRWG%Jr¥' «4QE2Š( Š( Š+ø¥­ê>øq«jÚMÇÙï ò|¹v+íÝ2)á‚G"€; ÇñSÛÅá}F[­#ûb4ˆ¸ÓþÎfûCƒ”M¡[«mçoSÀ¯Ÿÿá=ø‡ÿCþSmÿøš?á=ø‡ÿCþSmÿøšËÛC¹Ýý‰þ_Å™ë_ ¯îõyumSVѵ; fà[ý©îìÖ ¬8C; w’Ç—ï«æøO~!ÿÐßÿ”Ûþ&ºÿ…^2ñVµãË'\Ö¾ßjºc\ªý–(°âTPrŠB{÷ªHÉÙ3*¸:Ô£Í8Ùz£Úh¢Š³˜(¯œ5oˆ>;>(×í¬üMök[MNâÚ¾ÁíDrd®OúUoøO~!ÿÐßÿ”Ûþ&³u`›:áÄTŠ”c£ó_æzÅkˋȵ- ø¯Î–ÑKE„:]² År|_•<œ±À-é\Gáí2;Ë8,®–Ò!5­¸8\ ܈ (9ð:×ÎÿðžüCÿ¡¿ÿ)¶ÿüMEsñ⽬ÓÿÂ[»ËF|fÛŒàgû´{hw)åø”®ãø¯ó>¢±ü'}q©ø7C¿¼“̺ºÓíæ™ö¹Ú5,p8$ô­ŠÐâ +ø¥­ê>øq«jÚMÇÙï ò|¹v+íÝ2)á‚G"³ÿáø‡ÿE?ÿ(ÿã@?Ñõ½Âï¥èob’\J‚ä^Ë,hð—MÑ|ß6ŸUfÏü$°ñ%ü%Ûö[ùúÜÓ§—‰æHØó7õ„á6§æÏj±ÿ—Ä?ú)ÿù@·ÿ?áø‡ÿE?ÿ(ÿã@Ey|RøËßü+¤êÞ0þÙ±Õ¾×æEý™ ¾ß*ÕÉ~!I,v-÷UOS»v .JåÔzü"_ÿè§ÿåßühÿ„Kâýÿü [ÿzåúî‹ñDðö§«ÂÉó¾Ãi-Ï•ý…n»ö!m¹ÉÆqŒà×qá;ëOÁºýäžeÕÖŸo4Ï´ ÎÑ©cÀÉ'¥lQEyçÆ/ë>ð¾s¡ß}ŠêãSŠÙ¥ò’O‘’BFÕAü(M»#ÐëÏþ/ønëÄž´Kk9/£±Ô"¼¹´…¶Ë<*®®±ú¾ìq“€|Ÿþ߈ô7ÿå6ßÿ‰£þ߈ô7ÿå6ßÿ‰¬½´;¿ÙØŸåüWùž±ðzÃQÓ<5å¥õ¥œz„ÿÙ߯±Î¶¤‚¾bŽŒX¹9õãåÅz|Ïÿ ïÄ?úÿò›oÿÄ×_ð«Æ^*Ö¼ys¤ëš×ÛíWLk•_²ÅJŠQAèO~õQ©;&eWZ”y§/T{MQVsó†­ñÇgÅýµŸ‰¾Íki©Ü[CØ }¨ŽBŒ•ÉãŸJ­ÿ ïÄ?úÿò›oÿÄÖn¬³g\08Š‘RŒt~küÏdÕZÔþ)iZs$‰á»+#¨Êßf-ÍÎò‰9àÊÊ£®W8ÈR½¥|Ïÿ ïÄ?úÿò›oÿÄÕ]GâOÄ; .á,ó6cåþηÉ®ßzX7dÇ,"1r”t^küϨ¨¢ŠÐã (¢€ (¢€9ÿÿÉ<ñ/ý‚®¿ôSWÊvVš“X[´z®Ä1)Uû:£Œ÷¯«‡Ÿÿ>%ÿƒ%þg¢èzæ¯ã='N±×äµÔ'K“mtŠb12ÄÍÈrci# 'ƒÐúWþüDеMu›Å1éOrñ;Þ%¼wÂù¾rNd!Ô©c’@ÜXõÆk?Ã>¶ðßÅï}žòúçímÝö©CíÛnz` }ïÐWЕô*Ñ­B5"îüºŸ)™¸ëŸÿ>%ÿƒ%þgö=Wþƒù,µÝüŠâ/ŠW‹ssö‡þÅrË çÇÆ/ü+ 7þƒ:çþ/ÿZß ´<=ñ†âÒÞæêá@iKÜÈ70Ž?và3uërFW~–<ÜÖXw†~Γ‹ºÕÍ¿Á¶{Q^éòçȚݽì¾4ñK[_ý?¶®ÁO%_'Ì<äÕ_±ê¿ôÿÉe®¾Zx‡Äž-»¸Ô5wM~ò –ÓRç$<óüªïü+ 7þƒ:çþ/ÿ_;ŠÍ)R­(9Y¯îŸ_ža¡ÏFMÛ~v¿ œØõ_ú ä²Ô¶š’Ø\4š®ô1eû:ÃŒö¯Cÿ…a¦ÿÐg\ÿÀ¥ÿâ*†·ðëO±Ð5´Õµ‡x-e•RK…*ÅTœ·‘ÅeO7£)¥Ïÿ’g„örµlþÜ¿Ì÷É<ð×ý‚­ôR×A^oá߈ÞÐ>xpj>"±IO¶…á†O:DqÈds.0AÈàñÖ½þúßLÓ®oï$òímby¦}¤íE±ÀäàÒ¾œøƒ‡øÛÿ$‡]ÿ·ý(޼›þÿˆô7ÿå6ßÿ‰®çâ—Ž|-â?„:²é:õÄÓù>]¿šfÛp™ýÓaÇžGNzW-ÿ ÃMÿ Î¹ÿKÿÄW—™c£…åæ•¯~—Úǵ”C .¬RçÚÚµmû5¹Ÿÿ ÿÄ?úÿò›oÿÄÑÿ ÿÄ?úÿò›oÿÄÖ‡ü+ 7þƒ:çþ/ÿGü+ 7þƒ:çþ/ÿ^göå?ùøÿðÙö9gý¿ü ÿ˜ÏxƒÄZïÅï oêÿÚOÛ<ŸôhâÙ›vÝ÷Îp½}+èÊùïÃ>¶ðßÅï}žòúçímÝö©CíÛnz` }ïÐWЕî૪ô#Q;Þþ]O™Ì#J8™*1厖W½´]B¼~ #ÄZ§Åïÿ`x£ûËþÏó¿Ð#¹ó³oòýò6ã Ó®ïjö ðmcL¸Ô~/xÏÈÖõ3Ëû³nÌ>fmÇÞÀçãÓ'Ö¯ˆ†“«Seÿ sR¥*³PŽìîáø‡ÿE?ÿ(ÿãGü"_ÿè§ÿåßükŒÿ„fÿþ‡àÕ¿ÂøFoÿèwñ‡þ [ü+ËÿX0]ßÜvfb;/¼Ûñg†´(Ål‰j³V”›^§¬QEFGȚݽì¾4ñK[_ý?¶®ÁO%_'Ì<äÕ_±ê¿ôÿÉe¯OðçÃ+Æš¿Œ5íOWµ–/ÞÀÊuD 0lPóóý…tð ¼=ÿAÿÿàdün¸êQ«)7+z#è°¹–_JŒaV”œ–íNKð¹âcÕè1ÿ’ËT5›mB=*vŸRó£s«Ÿ˜wïßð ¼=ÿAÿÿàdün¹_‰to øSÖm5}râ{+lwW(ñ¶éQN@@O O^¸¥”“r_r.¾i—N”¡ 2M¦“ç“ÖÝ®}EWióAEPEP?ã¿ù'ž%ÿ°U×þŠjáü'ðƒÀšŸƒt;ûÍ ÌººÓíæ™þ×8Üí–8’OJî©s£ BK(^èI­20” /•óFÓ»<`b€;OøR_?è^ÿÉÛþ9Gü)/‡Ÿô/äíÇÿ®þߟóõ¡ÿàõ¿øõð†ü ÿŸ­ÿ­ÿǨŸÁ>ðwÅï`iÿcûWö‡ûé$ݶßåûìqÍÓÖ½‚¼>ËFð6‘ñ{Á?ð…Ëc'›öïµý’üÜãÿ&rí·«úgŸJ÷ (¢Š(¢Š(¢Š+Ïþ6ÿÉ!×íßÿJ#¯@¯?øÛÿ$‡]ÿ·ý(Ž€2¿áOxþ€_ù7?ÿGü)ïÿÐ ÿ&çÿâê§ü!¿?çëCÿÁëñê?á øÿ?Zþ[ÿWÊ«øïú ŸãÿÉ›ûXÿ)oþ÷€ÿèÿ“sÿñuSÁþÒ|/ñ¾[ÓìÖÒxpÌÉæ;åÍÊ‚rÄžŠ?*?á øÿ?Zþ[ÿT~ Ó<)¥|lšÉhú{xqžCkvnÍûJƒ–,Ø;BñŸOZîËòœV·´«‰•EÙÞߌŸäLêFJÉXöJ(¢½Ó#¼7à? x£Vñ…ö³¦ý¦æ?Þ¯çȘ@À…`:±üë ÿ…=à?úäÜÿü]y|úo…o'øWà½;ÂzÍõ®åÜÛXÏ4Oö©Ž×X؃‚ø<Ö¸Ÿì/‡óÛNÿÁ‰ÿâêž­¢ø-úK9l Ò[ÈÐ…¿,w…;p7òsŽ+ž J©âj½WGÿÉ‘#Ãͨª×É™Ö音ö¯x7FÔ¬õ«ë;ëËH.f3"MÏfUQ°ŽHÁ,xú×qð3À—uÌ6zGÙ.¤‰Ò´Ï'”䯴ɆÁÁÁëŠê< ÿ$óÃ_ö µÿÑK]}ñêŸ8xÛàŽàï‡Ú×ö¥õæ§kåüØHámÓ*}ÌVþÿQŸjî¿áOxþ€_ù7?ÿZ¿äë¿öïÿ¥×a|;ÿžÚwþ Oÿ_=ŸS«?gìêÎü*÷Ûz?-ÎLVeOnx9svWµ¿áÏLÿ…=à?úäÜÿü]ð§¼ÿ@/ü›Ÿÿ‹¯3þÂøwÿ=´ïüŸþ.ì/‡óÛNÿÁ‰ÿâë罆'þ‚jýÏÿ“8ÿÖ\?üøŸþ¿Ìíàð~ƒá?‹Þþİû/Ú~ßç~ùßvÛ—ï1ÇÞ==kÚ«æÿØxvËâ÷„ÿ°Ý·ý³ÎònL½-Ûn~cŽ­_HWØåQ”pS““×Ynõ{êÿ3Ñ¡‰Ž*𭏧ÑïØ+Ågð~ƒâÏ‹Þ8þÛ°ûWÙ¾ÁäþùÓnë›î°ÏÝ}+Ú«æÿXxv÷â÷‹?·ÞÝv}Éó®L]m×v>až‹FkK 5 8½5ŽëU¶«ó ø˜áiºÒ‹’]úèwð§¼ÿ@/ü›Ÿÿ‹£þ÷€ÿèÿ“sÿñuæØ_ÿç¶ÿƒÿÅÑý…ðïþ{ißø1?ü]|w°ÄÿÐM_¹ÿògþ²áÿçÄÿðþgmâ…~ Ó¼'¬ßZèÞ]͵ŒóDÿj˜íuˆ8/ƒÈkÒü ÿ$óÃ_ö µÿÑK_9jÚ/"ѯ¤³–ÀÝ%¼[òÇxS·'8â½GGøÃà¿ xE³¸Ôd¹¿´Ó-K[HYÛw”™Ž#<Ü`Ž£ô¹*Bœý¥IO_´­÷{Ò;p¹…X/Ê»ÆqœŸ`kÍþ*xëÁþ4ðî“g§jÐÝ$zÕ³\Æêð‘IC7Îàw#¦Fq‘^Íkºr³¶Œé””“ètð§¼ÿ@/ü›Ÿÿ‹£þ÷€ÿèÿ“sÿñuæØ_ÿç¶ÿƒÿÅÑý…ðïþ{ißø1?ü]|°ÄÿÐM_¹ÿòg“þ²áÿçÄÿðþg¦žðý¿ònþ.ªx?ÃzO…þ7Ëc£Z}šÚO™<Ç|¹¹PNX“ÑGå^}ý…ðïþ{ißø1?ü]t¿ m4k?‹wQèm Z ÙŒ3™Fÿ=3ÎO8ŠõrzU£ŠNu§%g¤“KÿJ‘¾9£‹Ÿ²…)E÷i${åQ_Zz‹ø?áß…|]¨øÆÿ\Ò¾×u‰oaWûD±áR€êÇó®£þ—ÃÏú¿òvãÿŽWO¦øVóÅ^+“\’Õn†»xª&»1›øãpã$óRa|;ÿžÚwþ Oÿ^elÒ)¸8IÛ²ÿ‚xõóªTj:nœÝ»-?3ØáI|<ÿ¡{ÿ'n?øåqÿ¾ø7ß5m[IѾÏ}“åËö©Ÿné‘O ä Šã¿°¾ÿÏm;ÿ'ÿ‹¬i> ¶ðåÜÚL–Fùvyb;Ó#}õ ¸çŒö¥O6§Rj ÕÛoø$ÒÏ)T©jœÕÚZ¥m~g×4QEz‡´QEQEsþ;ÿ’yâ_û]覯Œâû7’›¼ÛFsŒ×Ùž;ÿ’yâ_û]覯“ìïü:–6ë:[ùÂ5›bNìsÎÞy¯{!ÄFI¹rê¾×èmG CjSQ·sýþ˜þ”¢ÓÒºí r×ÿOÿGö†?¹kÿ€§ÿ‰¯§þÑ¥ÿN¾ôtcÓÿ ˆýÿðM_ƒþWü-ïù[?åç;qÿ>òWÖ•òßÃ;2ãâ÷†¿³V!·í^g—Ïùw|vï_RWÅæÕ\dçYÛáÛe·õ¹ÉRŠ£'MKšÝPQEçQEQEçÿäë¿öïÿ¥× WŸümÿ’C®ÿÛ¿þ”G@üZú‘¿òRø´õ#ä¥|íý£áîZÿà)ÿâhþÑðÇ÷-ðÿñ5ídÒÿ ˆ}ëüÏWû:ŸüþÞ}ÿƒþ¤oü”¬ ÿÂ7ÿ ÖOøEÿ²¾ÃÿÑßý™åù~gÚFsåñ»}ñŠðïí r×ÿOÿ]÷Àù¬'ø£xÚxŒB4Wd{ï:>ر\ø¬:ùãZ2òOS F¡ÌªF^IŸEÑEæœ'‹ø?þ?íÿÂQÿçÛ¿á%½Ùý§äyž^Wó9ÛÞÙÍuñh?êFÿÉJàôᾕ¬ø¾C¦¾ Þ#½xÍÖšnÊÜÃl Á¸Ï¯­nÂeð3þ}t?ü7ÿ ƒþ-ýHßù)Xþ,ÿ…Yÿn¹ýÿoÛ¿³î>Ïö²ùžg–ÛvcÙÆ1Îj¿ü&_?ç×CÿÁñšËñ/‹> \øWWƒK¶Ñ†¡%”ÉjcÑYJP„ÃyCiÝŽr1@©àOù'žÿ°U¯þŠZè+Ÿð'ü“Ï Ø*×ÿE-tçÿäë¿öïÿ¥×ÊßèŸôÇô¯ª~6ÿÉ!×íßÿJ#®þ/Ÿóë¡ÿà‰¿øÍz¹fiõÝ©ó[~–¿ù™ÎÝO¿Ñ?ééGú'ý1ý+èŸøL¾Ï®‡ÿ‚&ÿã4Âeð3þ}t?ü7ÿ¯Wýgÿ¨xì<Ï+ø?åÂÞðÿ•³þ^s·óï%}i^e¬øWø½àŸøBâ±ÊûwÚþÉ`m³›“9EÝÑýqÏ­{…xìWÖ«Ê·*í¢ÛEcXÇ•X+俌Wü-ïy»?åÛ±ÿ>ñ×Ö•òßÄË2ßâ÷‰´–#»ì¾_™ÿùwLö8í[e5,d'+Y_âÛg¿õ¹­:*´•7.[õg¢ÓÒôOúcúWAý£áîZÿà)ÿâhþÑðÇ÷-ðÿñ5öŸÚ4¿é×ÞŽ¿ìzô¿þ ÎKöo%öù;¶œc¯¡4oþñƒ|=~é}eu6Ÿ ×iqþ¹Þ5$‘ `9ÏÝÇSí¼¿ðëØÜ, oçØ&-ˆ;±Ç;xæ¾°ð'ü“Ï Ø*×ÿE-|Æ}ˆjqåÑ}ŸÔç­…Ž¥©_±Ÿ­ü/ð~·£Ï§aØØyÛÒl-!ŠdÃù[aÆqƒìMx÷ÅO…Þð‚¬®ôÕºšî]Q#{›¹·0ŒÆä¦*ã(qž¼ãŠúF¼›ö‚h“ÁZCO$kP—ÈÈÛåËž;ñ^-j‘~hÉ.gcæÿôOúcúQþ‰ÿLJè?´|1ýË_ü?üMÚ>þå¯þŸþ&¿AþÑ¥ÿN¾ôvÿcÓÿ ˆýÿðNýþ˜þ•ê_³ï—ÿ "ûÊÛ·û"O»ÿ]¢®7ûGÃܵÿÀSÿÄ×}ð>k þ(Þ6ž#ÁÙÁ»Î¶lW“œã)Õ¸ÇÙî¾̪åð¡uUKÉ3èº(¢¾@æ>-ñ‘ÿ ï‰üß/wö½×ÞÆÖµd¢ÓÒº½vïGƒÆ¾)]Aa3jìðï;|Ãß¾j§ö†?¹kÿ€§ÿ‰¯¸Ë±Ôá…„_³Ñuzüθe«7Y+ô¹Ïÿ¢ÓÒ¡¹û?ÙÛg•»ŒmÆz×Mý£áîZÿà)ÿâjޝ{¡K¥Ì–i¸;v€©ûÃ<ãÓ5¦3JXz‘^ËX½ž»tóì9eTá5^.ÚÚûŸkÑ^uûJhé¨À–~¾–Äíó¦šdŽDçæÚƒpl –\ž8ë]ŇÅÿjzµ…ž»æ]]JŸdœnv (ÉL ’:×ÁœgqEP\žã‹'ÒM߈§´ÐäkÛ»xc½¹Š/1b™“#÷Œ n~ð$|¥Ië+Çãñ%’Á$2xCâsF×wH"¶’ÛË3?˜ëû™rî$ûˆÉçÒk>5–Ie²Ö4Û]íáhµI4Ù¯mÞW–HÙLñH±Æ¬cæ=_­w•òe—Åsr’Yë±k/§Ý¼iyog«ÈRHCe†'ó$‚Á‚Ka€qÉ>¸¿¼8è4/2°È"Ò2ÿ¿”›°Ô\¶GOñOY¼Ð>ëzƒùwKÄ’AO1Ö2ÊA0H=ˆÍø.;íâZèCÅ:–½§Ýxq55åÇœ¯/œD.rC“Ñ{â½™-4ìŠ( AEPEPEâŸYøG×zåüsÉkk³z@ ¹Üêƒ:°ï@â«[‹ß ê6öº×ö$'QÚÙлrF>PÃvFÜäŠàá~x{þ€%ÿÀ8ÿøåSÕ~4xOZÒî4Ûÿø¥í.Sd¨Ë/q¹%¡äd ¥Ì»—ì§ÙGË{¸ÓS¸‹YÕõMàÂúlÚ»3NùLÈà²+yg(=Õˆ9nê¼'þx;í_Øñ-ŸÚ¶yß¹ó7mÎß¿1Æ77OZï¼ñKGñ¦¹.“c§ê¶·1[“öØQ e^0äç,;v4]18I+´wQE2BŠó-Gã‡ôíZÿNm^ž[+™-¥x-£d.ŒTàù8îVÿ…ùáïúx—ÿãÿã”®‹TæÕÒñ[JÖR-K[Ò5ýrI­­ÛM°ÕÑ,â )3¸ÆdRP|¼1ÃØ/¤hWkðö™söï·ùÖ‘IöÏ+ÊûFP3gðîë·¶q^¯øÇáwŠ5FÔõxŠ{Æ@*Ãå¦í“N8Éç€WOÇ YZ$0øsÄöÐ TD±‰4Q€`È=œû3ÖèªzN¥³£Xê–ë"Á{oÄk ‚º†à‘œSW)V?Š|Igá]ë—ñÏ%­®Íé‚çsª @êýp?ð¿<=ÿ@ÿàür“iÊ[#ªøâ¥ðƒ®/Væ;{»‡[KIdV+¯œ9Â7¡ŸNvc©Éü Ôtë;Å6v:„÷¾^·-ÂÍq¸É,2#‘Ø–o-‰ï‘Èïø_žÿ ‰ð?þ9YúWÆèŸmþÎðljaûuÜ—·èÊÛæ|nne8ÎÚŽeÜ~Ê}™ìÔWžxsã‡â_ÙèvÚ^µmuw¿Ë{»tDù¹ÉOEì;Šô:d´Ó³ (¯<ñÆ-Ã^#¼Ðît½jæêÓg˜ö–èéó q‚\Üv4M»#Ðëçÿ‰pxÆÏÅúLjS[Õm¼%gwgÌzv©¶DFŽ0û# …l‘Ó"¶$Ž«þ燿èâ_üÿŽW}⟅–¹&³wàz[é%HÆßî0rщ¶‘“‘ódç94¹—r½”û3èj+Êã÷‡ ¾…âEU$ÚF÷ò½/IÔ¡ÖtkRÝdX/mã¸d0WPÀ3ƒêhM2\\wEÊ(®sÆ~3Óü¤Á¨ê6÷—Or¶È–ˆ®ûÙY‡ Ô÷ô¦#£¯?ø¿­ë:?„­ Юcµ½Õ5´ápÍ´Ä$W9Vþò»°$Œ“ÿ óÃßôñ/þÇÿÇ*ž«ñŸÂæ—q¦j~ñÍÂl–'² ?ïæAÈ ‚)s.åû)ögEðzÿQ»ð\ÖºÅÝõΫ§jÙ^ÉB’)ª¸Ée‡,IÎàеèâºÅŸø[K]7Eð—ˆ­-—ضˆÅ˜õ,Í),z ’xt»/üRÑüi®K¤Øéú­­ÌVÆäý¶E(WŒ99ËÝLNJíÅQL¢¼ËQøãáý;V¿Ó›G×§–ÊæKi^ hÙ £8>gN;U¿á~x{þ€%ÿÀ8ÿøå+¢Õ9µtˆ|G ,ßm4?øÕ5 MÞút·¿aoem–$€€Ì<´Â’ ο®WŒÿÂáðoü$?ÛÿðŒx—ûOìŸbóþÌ¿êwïÛ·ÍÛ÷¹Î3ïW¤ý |5 fIt?¢¬Ö‘€?2‹ tæµiž‰7†´Pj“èšlº€uqvö¨Ò†\m;ÈÎFxÀ­J(¦@QEQEÉø§IÓto†ž'·ÒôûKL»vŽÖ‰Ky,2B€3€}…|Ác«ùVÑÿgß¶Ø•w,9rzWÕ~;ÿ’yâ_û]覯œ´Ÿx¦çG±¸·›GKoÆ$iwm*ÎÅpã±£æÒ¿sØÉýŸ<¹äÖÌí±ÿ@ÝGþüõèþÛô ÔïÇÿ^º/øB|]ÿ=ô?ûî_þ&øB|]ÿ=ô?ûî_þ&¼ßí?óGï=ÿöùù?ü˜|6½ûgÅï ÿ£\óí_ë£ÛœÛ¿Oʾ¢¯›¼¢jú7Åï ÿj½‹y¿lòþÊ\ãíœîÔ~µô{Z‘«EN/GÛÔùlË—ë2åm­7V{.EWAÂQEQEçÿäë¿öïÿ¥× WŸümÿ’C®ÿÛ¿þ”G@=ÿmúê?÷ãÿ¯GöØÿ n£ÿ~?úõÑÂâïùï¡ÿßrÿñ4Ââïùï¡ÿßrÿñ5óÿÚ8æÞ}¿û?üüŸþ¿Ìç¶Çýuûñÿ×®ïà•×Úþ)^IäM4W]³&Ò8ô¬oøB|]ÿ=ô?ûî_þ&¡Ò4ÿˆ:/®-|9ŒÚܺVKÀˈàó—$yØ]Û”Aá¸äu`ñ”ªÕåƒMù3ÎÍ=—Õß$äÝÖñ±õEàþÕþ4êWÚ‡Ÿk‚ˆÆ_ZÉo‘%Œ/'©Â‘òŒHË|m¨|m°Öôón‘2¼d'ö³Mìàùžb’å¾\r9 ^±ó'ŸëZÙ.ÿžúý÷/ÿ^.#FeI]yŸ[ö?W‡4äºE3þÛô ÔïÇÿ^ ¾ÕüÛ ˜ÿ³ï×tL»šy'=+©ÿ„'ÅßóßCÿ¾åÿâj¦­áÛh÷×hæ­äym.í¡I8Êã8¬á˜PrIJ?yµ_aÈíR[*>ð'ü“Ï Ø*×ÿE-tÏøþI熿ìkÿ¢–º ÷Œ<ÿãoü’wþÝÿô¢:ùïûlÐ7Qÿ¿ýzúãoü’wþÝÿô¢:ñßøB|]ÿ=ô?ûî_þ&¼üv&^v•ï»ô=¼›Ùûüòkm•ûœïöØÿ n£ÿ~?úômúê?÷ãÿ¯]ü!>.ÿžúý÷/ÿGü!>.ÿžúý÷/ÿ^öŽù£÷žçû?üüŸþ¿Ì>^ý³â÷†Ñ®aÙö¯õÑíÎmß§å_QWÍÞ Ñ5}â÷„ÿµ^ż߶ye.q‹vÎwê?ZúF½Œ-HÕ¢§£íê|¶eËõ™r¶Ö›«=—@¯š†¾—¯¼ÕüQ¥|^ñ·ü#i£·™ö?ûHJz[ü»vêÙϵUz”éÓrªí 4ªFªt¾#€ûˆèOñ/þ $£ì!ÿ¡?Ä¿ø,’½CþïŠóÇÁÿ÷ÍÏÿGü%ß?çƒÿþ*¼ÿ¯åÿóñ§·Ì?—ð<—S²×SJ¼i¼+â"9yeÓÝQÓ–bzÔšúcÀŸòO<5ÿ`«_ýµåž%ñGÄk ëßEáQi%”É9n<Í…m¹8ÝŒãq¸ß° Û»Æ3Ž3^¹ã¿ù'ž%ÿ°U×þŠjñ¿x—ǰxkJ†Î/ XìáXLË>ý]Ø8Î1œWÏñ¨éÃÙòïö„ó X/z¬ùnmÿ`üJÿžžüîÂì‰_óÓÂÏøUoøJ¾#Ï/ ÿß7ãGü%_¿ç—…ï›ñ¯–äÅv¦ë&þÓ,Ò7T÷Aþ±a'îªûžÉàOù'žÿ°U¯þŠZè+Ÿð'ü“Ï Ø*×ÿE-tú Ï?øÛÿ$‡]ÿ·ý(޹oì‰_óÓÂÏøWSñ·þI»ÿnÿúQq¿ð•|Fÿž^ÿ¾n?ƾkˆcUû?gË×âùl'™QÁ§/7ãoørÏöįùéá?Îçü(þÁø•ÿ=<'ùÜÿ…Vÿ„«â7üòð¯ýóqþ4ÂUñþyxWþù¸ÿù¾LWjaþ²`ÿçøÝ2ÃĶ_¼ÿ i'Û¼ìó'ks»vÿªãõî•óF¹}ñ\ñw‡¢‚M"ßYHï~Àö;×?¹ÌŠL™•SÆ äŽ£§ð.ñ§HûÚe±“ÍòöÿÂA%Î1»>_”í·¯9Æ~\t5öùB’ÁCž××m·cúÌ1?¾§.dúþ¸W…êv%½ø½ãOøG[I>Ãçÿh;Û»v}9öª~1ðçÆíK\ûL7¹¢PDÔ´ãФgyÎrOé^¾Ië¹ù-gðîÛ8|_î©Õæ}j¢Š+ì†x&¡xËSÖ|_7‡¤ÐVÑ|GzŽ50“~àN6 mÁ_|æ·áø¡ÿ=¼ÿ}\ÿñ5Îè¾%ñ~­ø¾ßÃñhmhÞ"¼w7ë)}û€8Ø@Æûç5³ÿ ßÄÏùáá/ûâçÿН±Ô~ÖÜÝL%žÒ ý“­fºáø¡ÿ=¼ÿ}\ÿñ5Ë|Eðï,< ©\ëRxlééåy¢Í§óy•Û¸cïœöÍtðüLÿžÿ¾.øªæ~ ø³Ç:Ÿõ=b/­„žWšlÖq/)wu9íššPʽ¤}š5Õ½zâ 5_³U¯}-ÞçÑ´QE{fáEPEP?ã¿ù'ž%ÿ°U×þŠjù6ÏÅ>#·±·‚ Wd1ƨ‰öxÎÕdŽx¯¬¼wÿ$óÄ¿ö ºÿÑM_)YxnÚ{ yšêì4‘+² €xâ½|£…ÅÎQÄÑURZ&ía¬±ãýÕíÝ'ùŽÿ„¿ÅôÿÉX¿øš?á/ñGýòV/þ&¥ÿ„ZÓþ~ïïàÿ ?á´ÿŸ»ßûø?½ßì £þ€Wþÿ¯õR_óîtÈè~êú¦«ñ{Ã_ÚWŸiòþÕåþéSnmß?t ô•}K_.|5Ò¢Ó>/xgÊ–i<ϵgÍ`q‹wéǽ}G_1˜áèáñ2¥BŸ$U­í¢‹Ô—„xOÜ4•»m®¡EW‚Š( Š( ¼ÿãoü’wþÝÿô¢:ô óÿ¿òHußûwÿÒˆèÿámüCÿ¡›ÿ$-ÿøŠ?ámüCÿ¡›ÿ$-ÿøŠÊÿ„ZÓþ~ïïàÿ ?á´ÿŸ»ßûø?¾ûû7ÿ@¿ù;ÿ3¿ûÝ¿ð¶þ!ÿÐÍÿ’ÿüEuÿùÏ^ÕíÕåâ>­íÕ!ÉO¤{wünÎê4£J I%ÙY|µñ;WÕ4¯‹Þ%þͼû7™ö_3÷Jû±n˜ûÀã©üëêZùsâV•§ñ{ÄÞl³Gåý—SœÛ§^=«|»G‰*ôùâïxí}ü¦«ñoØ$ûí¦§/ÿ Š?è3ÿ’±ñ4Â_âú ÿä¬_üMKÿµ§üýÞÿßÁþÂ-iÿ?w¿÷ð…}?öQÿ@+ÿÿ‚Wú©/ù÷º?äS¼ñOˆî,n ŸUß ‘²:}ž1¹HÁŽ+ë/É<ð×ý‚­ôR×ÊW¾¶‚Ââeº»-LÀ4ƒžx¯«| ÿ$óÃ_ö µÿÑK^oÂá'ᨪI­Rw¹/,xuÅ+öI~GA^MûAM-¿‚´‰à}“G­Bèøkå àõæ½f¼ŸöˆOà½,Mj%O#1Ê8¯.”T¦£%tÞÝÅÉÏî÷<3þÿÐgÿ%bÿâhÿ„¿ÅôÿÉX¿øš—þkOùû½ÿ¿ƒü(ÿ„ZÓþ~ïïàÿ ûì £þ€Wþÿ¿õR_óîtÈ‹þÿÐgÿ%bÿâk¿ø¨ßê/'Ôn~Ñ2è®üµL/Æä× ÿµ§üýÞÿßÁþÜ|·µÐþ$ê-%É[xô)&’k‰¢hòIàÏ5çæyV_†Ãº˜|*§+­S¸žC,ïœ"½OðGÑÔV~™®èúß›ý“ªØßù8ó>Ép’ìÎq¤ã8=} Zîc¨Á§^j¶6÷Óíòm¦¸D’MÇjíRrrAM|Ù'Êö»¬éž5ñLuÿÙámjíÊy(ùo0ŒåìSÿ„¿ÅôÿÉX¿øšÐÖôX5x¦i'¸—Z»\Fàõ„ú{ÕOøE­?çî÷þþð¯«ÁdÙel<*UÁ©É­]÷)pä±?¾Tàù»¥È‹þÿÐgÿ%bÿâj–­â=zÿLšÚ÷Ró­ßnøü„\áž V—ü"ÖŸó÷{ÿøUg@·²Ò¦¸K‹§dÛ…w°žõxœ“+§Fu)à”d“iólÒÑïÐ?Õ™Qýﳂå×D¯¦½µ(¢ŠùBŠ( «Ùß[ê4Ö²y‘¬²BNÒ0ñ»FãŸFVøãб^waáÏéšKÙx~Ö=.§}$‹ż b£)GávÚƒÌrp‰‘Ææ*3Ž3“Åsþ𠆹âÐõ¨<[¡ß6žºŒú´3o„¾Ã¸”£d”ŒõÎ03î>0ðÔ>0ðþƒ=Ä–évŠÈ(ÊÁÔàõ”dq‘žGZæü'áoEã&ñ/‹o´©®£Ò—LŒiêÿ½g˜d}ÀÙ`îè¸æý½_æ{+ÛTþg÷ðïÁíÃ^#³×-õ]jêêÏ”—wéó¡CØöèTQYÊNNòwd6Û» (¢‚Š( Š( ±üSá»?xrïC¿’xín¶ox]\`GU«bŠò¯øP¾ÿ¡ƒÄßøÿ¬¿üÑt/ÞjV÷¾1ÔçŽÎÒéIXDN2rN'¯i¬ýq5I4;ØôG‚=Mâd¶’vÚ‘¹à9ù[;zãiÎ1ÆsZûz¿Ìþöiíª3ûÏðŸÂí?]¼Ôôí]¼S£jZ”Ò@uh.£6ƱŽ~FÈÇsϋᅺ?ƒ5Éu{-GUº¹’ØÛ¶Ì’…•¸Âœ¨ïÜÔßtOh6V¾#“L™˜Æéqi4òÍ;…Úï3ÍÉ8XÀÆŒ®Ê”ªÔ’´¤ßÌR©9+6Š(¬È<þO„öÚ:…埊¼W§ý¾îKÉ¡±ÔQùŽrÄ(O ç'sGü*ÏúŸ|sÿƒþ½ŠùÿÆú~­á Ë/RñÍΛHís⸭RF`çda—`ü£æãîà©nÒ…ðë:4Oqã,¶àÉiy©€Á]yI¡ÁÁ÷cÇÞñ—‰^òÂÀøbçE¸· Z¤y¶²íui#e 7áønØ(Á-Üi6siÚ5•ÅÜ—“ÛÛÇ—2gt̪s’NIêzõ4i:l:6c¥Û´•¼vñ´„*Š€p=\¢ŠÇñO†ìü]á˽þI㵺ٽà`muq‚ATv®_þgýO¾9ÿÁÇÿa^Ey~¹à[?hwº½ÿÄ¥­¤M+ãY¶:*å@,Nä+Ÿøyá]SÅzv±6¯âïXÝXê³Xýžkw–!Ã6Òb ޝTñW†!ñf—„úŽ¥`‘Ü%À“OœDå“%A$a¾ª§µaü9ð=ç‚ÿ·¾ÙªO{öýAç‡Ì¸2~ïø]ò£÷Í“¼Œƒµ}(Ò~Yéž#ÓõɼGâ=RêÃÌû:jWÂt_1 722b:Jî(¢€ óß|ÐüKâ;ÍrãUÖ­n¯6y©ipˆŸ" è½Ïs^…E8ÉÅÞ.Ìi´î*ÿ… áÿú¢»‡>¼ðއ}o~ö?j¾Ô&¾xl¬oÚqç (ÇÇ8Éì)J­I+JMüÅ*“’³lñÿ |ÿ„í_ñZë‘yû?äÿbÎÜýþ_w^:cž¹â=sà :æ³myqã fâÑRE¿"ævPIJ’ŒÓƒ“Îq^ÉEfAäÐþϾ·Bk~"‰IÉTºFêOøP¾ÿ¡ƒÄßøÿ¯U¢´Uª%e'÷–ªÍ+&ÏšµŸ‡Zž•¨¼ø{Ä×6ïvm­&&µV¹äía’XeAb?…C€¤Žö_€š3ºï‰$CÕ^î2áå×Yuá[›ßŠV>&¸š94û 1ീÊû¢¹w!¤ ÷pc;IÎxpë)ºÕZ³“ûÆêÔjÎOï (¢²3 (¢€ (¢€9ÿÿÉ<ñ/ý‚®¿ôSWÈvßð}–?ývÅó?Ö}ìsÓŽµõçŽÿäžx—þÁW_ú)«å+-sM†ÂÞ'¹Ã¤J¬67í\ø‰Ê)r¦ýc'Ãѯ9*Õ# /´“û®ÑSþ)_óæÑÿ¯ùókKþ /þ~¿ò…ðióõÿÛü+“ÛUþI}ÿð þÍÀÿÐM/üòf×Â¯ì¯ø[ÞþÌÿ§Ÿ3ïÏ»ãï~5õe|¹ð×Pµ¿ø½áŸ³K¿gÚ·|¤c6ھ£®ê2r‚m[Ôù\Ê”)b¥ rRJÚÇm–Ú¿Ì(¢ŠÔá (¢€ (¢€ óÿ¿òHußûwÿÒˆëÐ+Ïþ6ÿÉ!×íßÿJ# ™ÿâ•ÿ>mñJÿŸ6´¿á Òÿçëÿ!·øQÿ —ÿ?_ù ¿Â¼ÏmWù%÷ÿÀ>ãû7ÿA4¿ðÿÉ™¿ñJÿŸ6½à?öü,Ûßìïõ?ØÒgï}ï:/ïsÓÅÂA¥ÿÏ×þCoð®óà…å½÷Å+ÉmäÞƒEu'sçGëõ­¨Tœ§iE¯_øcÍÍpXZ8w*U¡'u¤ROð“ü¢¨¢Ší>døçĿؿðœx£ûGýwöÍÞ>ÿÝó÷xëšÍÿŠWüùµ¹­ê–V>4ñLWls­]°IãÌ>ƒÚªÿÂA¥ÿÏ×þCoð® µj)´¢ßõè}n„©‡„§^œ[[4®½}åù¿ñJÿŸ6£¹ÿ„kì³yë¶7—þ³ïc޼u­oøH4¿ùúÿÈmþÍaq\åÞ&U’Aö¨j—^俯‘½\»¡&±ž—ÿ&}[àOù'žÿ°U¯þŠZè+Ÿð'ü“Ï Ø*×ÿE-téyÿÆßù$:ïý»ÿéDuó?üR¿çͯ¦>6ÿÉ!×íßÿJ#¯žá Òÿçëÿ!·øW.&r¹S~Ÿðǹ“a¨WçöÕ# ZÜÉ;ïµä¾foüR¿çÍ£þ)_óæÖ—ü$_üýä6ÿ ?á Òÿçëÿ!·øW7¶«ü’ûÿàçönþ‚ià+ÿ“6¾eÂÞðçögý<ùŸ{þ}ß{ñ¯«+åφº…­ÿÅï ýš]û>Õ»å#·|uÕõwQ“”jÞ§ÊæT¡K(S’’VÖ;l¶Õþa_)üUþÊÿ…½â?í?úvòþ÷üû¦~ïá_VWËŸµ [‹Þ&ûL»7ý—oÊNqn™è=è­'6•ý-¥ ¸¨Â¤”S¾²Ûg¾«ó8¯ø¥Ï›GüR¿çÍ­/øH4¿ùúÿÈmþÂA¥ÿÏ×þCoð®mWù%÷ÿÀ>«û7ÿA4¿ðÿÉ™7?ð}–o#ývÆòÿÖ}ìq׎µõç?äžxkþÁV¿ú)kå+ÝsMšÂâ$¹Ë¼Lª67$ƒí_VøþI熿ìkÿ¢–ºðó”“æMzŸ?œaèМU‘×ÙI}ölè+É?h_+þ}+ÏÿSý³™×îùrç§=+ÖëÉÿhèò¹Â&µ 1ô9kylÏ.’NqMÛSçÿø¥Ï›GüR¿çÍ­/øH4¿ùúÿÈmþÂA¥ÿÏ×þCoð¯7ÛUþI}ÿð´þÍÀÿÐM/üòfoüR¿çͯGøýŸÿ 6÷û;ýOö4™ûß{΋ûÜôÅq_ðióõÿÛü+¼ø!yo}ñJò[y7 Ñ]IÁùÑúýkj')ÚQk×þós\ŽÊ•hIÝi“ü$ÿ#èª(¢»O™>9ñ/ö/ü'(þÑÿ]ý³w¿÷|ÃýÞ:æ³â•ÿ>mnkz¥•ðûFͱ— .ã èEhÂÛ±ÿ¡[Å_ø/_þ.£ðWˆ¢ñ7ÆÉ¯a°Ô,•<8Ñ"åN@ñóuö5ëä”Üqiº2ŽW$ÿù˜×Ì£ˆ‡³R‹ôÿ‡=ŽŠ(¯²8œl¼]¡h!ñm®§}äNþ ¼‘WÊvÊ—9U#¨5£ÿ +Â?ôÿÉiøšvƒã›o ë>/²›EÖ¯YüG{(’ÆÔH€–ü½=ÅnÂÛ±ÿ¡[Å_ø/_þ.¾2¤Þ.£özþ\¯ó= Y¬iAS犷øsþW„è-ÿ’Òÿñ5Ÿ¯|Að½ï‡u;[}O|óÚKköyFæd •ÇS]wü-»ú¼Uÿ‚õÿâë/Ä¿,õ ëKá¿Â×SD$šÄ*!d#,wðy5ÍF‹ö‘ÿg’Õ}¸ÿò%K8Œ“\ñ×úîzŸ?äžxkþÁV¿ú)k ®ÀŸòO<5ÿ`«_ýµÐW臚yÿÆßù$:ïý»ÿéDuÁÂÊðý¿òZ_þ&»ß¿òHußûwÿÒˆëþÝý Þ*ÿÁzÿñuó|C/gjn{í$­·tîua±« {É+÷ÿ‡Fü,¯ÿÐ[ÿ%¥ÿâhÿ…•áú ä´¿üMoÂÛ±ÿ¡[Å_ø/_þ.ø[v?ô+x«ÿëÿÅ×Í{ Ð<¿ð8ÿòOöÔž?×ÌÀІŒÞ<Ø)®W-´NÏuÕ§ùê„ÕFÒ·}Š?ð²¼#ÿAoü–—ÿ‰£þW„è-ÿ’Òÿñ5½ÿ nÇþ…oà½øº?ámØÿЭâ¯ü¯ÿ_ì%ÿ@òÿÀãÿÈßÛQþxÿ_3‘×¾ ø^÷ú­¾§¾yí%Ž5û<£s2J㩯mð'ü“Ï Ø*×ÿE-yg‰~(YêÖ,—Ã~%…®,¦ˆI5ˆTBÈFXïà òkÔü ÿ$óÃ_ö µÿÑK_SÃðq§;Óp׫OòHäÄbÖ%¦¤»ó ¯0øás —‡¼9upû ƒÄÒHØ'jªHIÀç ¯O¯3øÓvº~á‹ÖŠY–ßÄv²˜á]ÎáVC…ÉǽºßÖ—јF\­>Çÿ +Â?ôÿÉiøš?áexGþ‚ßù-/ÿ[ßð¶ìèVñWþ ×ÿ‹£þÝý Þ*ÿÁzÿñuùß°—ýËÿÿ z_ÛQþxÿ_3þW„è-ÿ’Òÿñ5{áι§kÿn.´Ë>ðûFͱ— .ã èEhÂÛ±ÿ¡[Å_ø/_þ.£ðWˆ¢ñ7ÆÉ¯a°Ô,•<8Ñ"åN@ñóuö5ëä”Üqiº2ŽW$ÿù˜×Ì£ˆ‡³R‹ôÿ‡=ŽŠ(¯²8œl¼]¡h!ñm®§}äNþ ¼‘WÊvÊ—9U#¨5£ÿ +Â?ôÿÉiøšvƒã›o ë>/²›EÖ¯YüG{(’ÆÔH€–ü½=ÅnÂÛ±ÿ¡[Å_ø/_þ.¾2¤Þ.£özþ\¯ó= Y¬iAS犷øsþW„è-ÿ’Òÿñ5Ïx߯þÖ<aa¨ù×2ù{È‘s‰žJÐïÿámØÿЭâ¯ü¯ÿ\¿ÄOˆ¶šï5-6-_µy¼¬Mwf%Ĩß3n8éÇR+,±4ß°’÷–¼ñv×·(ç›Æ¤\9㮟֧ÐôQE~€yÁEPEP?ã¿ù'ž%ÿ°U×þŠjù£Lø‰=Ž“gh4O0ADí`nÚ gxé_KøïþIç‰ìuÿ¢š¾?·ÓµG¶‰ã±ÜŒ€«yÊ21Áë]ø §/Ì¤áŽæ²Õr¦õù&râ²ÕŽJ.Ÿ=½FŽÏþuÇý?òpñÂθÿ þNþ"¹ìÍ_þÿù?ÆìÍ_þÿù?ƽ?õ7†ÿéçÝ?þ@âÿUáÿ@Ïÿ&ÿ3Ñ| âySBpn<è¹â¹q¼;“`){|?>Þò•¬÷Þ)~'F#Ž~ÕQqó÷¿V}%EWŽwŸ)ÞøÆ_x·Åv‰¦}¨>¹w.ÿ´Æd#Ú}?ZOøY×ôÿÉÁÿÄV‰ìï®Ü0=6ŒôÅbÿfjÿôÿÈÉþ5ZþÇQ†ÊI'³òâËyªqÈì*«p¦ABœ«QöœñM«©ÚëU{Çk“O‡!Fj¯ÕÚåw¿½¥ºî}ÉEW‚zEPEP_‰tÙµŸ êú]»F³ÞÙMoHHPÎ…A8ã'Ð׈AðoÇÖöñ@—¾+Ë>p?¹_AÑ[ÐÄÕ Û¥+\Ö•z”[tÝÿ…Cñþ<3ÿgÿâ(ÿ…Cñþ<3ÿgÿâ+ß讟í\güüfÿÚ¯çgŒx;ág‹4鿝u¢µ­¹-$”¹ß'¤wëÙ袸êÕY¹Ôwlæ©RU$ç7vQEf@QEQEW/ñÃwž.ð&¥¡ØIwW^VLjA¶Ts’=ö®¢ŠðøT??çóÃ?÷öþ"øT??çóÃ?÷öþ"½þŠô?µqŸóñŸÚ¯çg€¡øÿ?žÿ¿³ÿñÓü8øqâO xÊã[Öît©"}=­lžBrdG†QÇÊ{úq^±Ee[‰­J“mÕÅ׫YÉ´QErç„j¸·–½ðÐY¡"Yó‚1ýÊúЧšc³¨ÊxüKVsf_†´Ù´o é]ÃFÓÙYCo#FIRÈIã#ÐV¥WÆrÿ<7yâïjZ„GuuålyØ„eG9 ÑOjòøT??çóÃ?÷öþ"½þŠè¡Š­‡¿²•®mGVýœ­sÀ?áPü@ÿŸÏ ÿßÙÿøŠ?áPü@ÿŸÏ ÿßÙÿøŠ÷ú+£ûWÿ?·ö†+ùÙãøYâÍÇzF¹«Ýh­kcçnKI%.wÄÉÀd©Çzöz(®:µgVnuÛ9ªT•I9ÍÝ…xÇŒ~x³XñÞ¯®iZ*Úßy;RîIC‘*rêsÚ½žŠ)U)©Óvh)Ô•9)ÁÙžÿ ‡âüþxgþþÏÿÄQÿ ‡âüþxgþþÏÿÄW¿Ñ]ŸÚ¸ÏùøÎŸí Wó³çÉþ øúâÞX÷ÃAdB„‰gÎÇ÷+Ûü5¦Í£xWHÒî6žÊÊy2J–D HÈ‚µ(®jøšµÚue{U¯R³N£¸WñK§Œü=ce¤MgͶ¡ÞnÙ•Tq•IÎX~¼×qEb›NèÍ6Ñàð¨~ Ïç†ïìÿüEð¨~ Ïç†ïìÿüE{ýßý«ŒÿŸŒëþÐÅ;<þÄùüðÏýýŸÿˆ®ŸáÇÃxcÆWÞ·s¥Iéíh«dò“"8$2Ž>SßÓŠõŠ+*ØìMhòT›hή.½XòÎM ¢Š+ç<#SøAãI|C¬ÞØ^hf¾Ô'»ŒO,ÛÀw$„Æq_­Wÿ…Cñþ<3ÿgÿâ+ßè®ÚyŽ*œT!6’:¡ÄB*1›Iÿ ‡âüþxgþþÏÿÄU{<¿³’Ö[ß „|d¤³ƒÁþyûWÐôSže‹œ\eQ´Ç,v&Qq”ݘQEÂrQ@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@Q@ÿÙhpcc-1.4.1/hpl/www/scalability.html0000644000000000000000000002153211256503657014133 00000000000000 HPL Scalability Analysis

HPL Scalability Analysis

The machine model used for the analysis is first described. This crude model is then used to first estimate the parallel running time of the various phases of the algorithm namely Finally the parallel efficiency of the entire algorithm is estimated according to this machine model. We show that for a given set of parameters HPL is scalable not only with respect to the amount of computation, but also with respect to the communication volume.


The Machine Model

Distributed-memory computers consist of processors that are connected using a message passing interconnection network. Each processor has its own memory called the local memory, which is accessible only to that processor. As the time to access a remote memory is longer than the time to access a local one, such computers are often referred to as Non-Uniform Memory Access (NUMA) machines.

The interconnection network of our machine model is static, meaning that it consists of point-to-point communication links among processors. This type of network is also referred to as a direct network as opposed to dynamic networks. The latter are constructed from switches and communication links. These links are dynamically connected to one another by the switching elements to establish, at run time, the paths between processors memories.

The interconnection network of the two-dimensional machine model considered here is a static, fully connected physical topology. It is also assumed that processors can be treated equally in terms of local performance and that the communication rate between two processors depends on the processors considered.

Our model assumes that a processor can send or receive data on only one of its communication ports at a time (assuming it has more than one). In the literature, this assumption is also referred to as the one-port communication model.

The time spent to communicate a message between two given processors is called the communication time Tc. In our machine model, Tc is approximated by a linear function of the number L of double precision (64-bits) items communicated. Tc is the sum of the time to prepare the message for transmission (alpha) and the time (beta * L) taken by the message of length L to traverse the network to its destination, i.e.,

Tc = alpha + beta L.

Finally, the model assumes that the communication links are bi-directional, that is, the time for two processors to send each other a message of length L is also Tc. A processor can send and/or receive a message on only one of its communication links at a time. In particular, a processor can send a message while receiving another message from the processor it is sending to at the same time.

Since this document is only concerned with regular local dense linear algebra operations, the time taken to perform one floating point operation is assumed to be summarized by three constants gam1, gam2 and gam3. These quantitites are flop rates approximations of the vector-vector, matrix-vector and matrix-matrix operations for each processor. This very crude approximation summarizes all the steps performed by a processor to achieve such a computation. Obviously, such a model neglects all the phenomena occurring in the processor components, such as cache misses, pipeline startups, memory load or store, floating point arithmetic and so on, that may influence the value of these constants as a function of the problem size for example.

Similarly, the model does not make any assumption on the amount of physical memory per node. It is assumed that if a process has been spawn on a processor, one has ensured that enough memory was available on that processor. In other words, swapping will not occur during the modeled computation.

This machine model is a very crude approximation that is designed specifically to illustrate the cost of the dominant factors of our particular case.


Panel Factorization and Broadcast

Let consider an M-by-N panel distributed over a P-process column. Because of the recursive formulation of the panel factorization, it is reasonable to consider that the floating point operations will be performed at matrix-matrix multiply "speed". For every column in the panel a binary-exchange is performed on 2*N data items. When this panel is broadcast, what matters is the time that the next process column will spend in this communication operation. Assuming one chooses the increasing-ring (modified) variant, only one message needs to be taken into account. The execution time of the panel factorization and broadcast can thus be approximated by:

Tpfact( M, N ) = (M/P - N/3) N^2 gam3 + N log(P)( alpha + beta 2 N ) + alpha + beta M N / P.


Trailing Submatrix Update

Let consider the update phase of an N-by-N trailing submatrix distributed on a P-by-Q process grid. From a computational point of view one has to (triangular) solve N right-hand-sides and perform a local rank-NB update of this trailing submatrix. Assuming one chooses the long variant, the execution time of the update operation can be approximated by:

Tupdate( N, NB ) = gam3 ( N NB^2 / Q + 2 N^2 NB / ( P Q ) ) + alpha ( log( P ) + P - 1 ) + 3 beta N NB / Q.

The constant "3" in front of the "beta" term is obtained by counting one for the (logarithmic) spread phase and two for the rolling phase; In the case of bi-directional links this constant 3 should therefore be only a 2.


Backward Substitution

The number of floating point operations performed during the backward substitution in given by N^2 / (P*Q). Because of the lookahead, the communication cost can be approximated at each step by two messages of length NB, i.e., the time to communicate the NB-piece of the solution vector from one diagonal block of the matrix to another. It follows that the execution time of the backward substitution can be approximated by:

Tbacks( N, NB ) = gam2 N^2 / (P Q) + N ( alpha / NB + 2 beta ).


Putting it All Together

The total execution time of the algorithm described above is given by

Sum(k=0,N,NB)[Tpfact( N-k, NB ) + Tupdate( N-k-NB, NB )] + Tbacks( N, NB ).

That is, by only considering only the dominant term in alpha, beta and gam3:

Thpl = 2 gam3 N^3 / ( 3 P Q ) + beta N^2 (3 P + Q) / ( 2 P Q ) + alpha N ((NB + 1) log(P) + P) / NB.

The serial execution time is given by Tser = 2 gam3 N^3 / 3. If we define the parallel efficiency E as the ratio Tser / ( P Q Thpl ), we obtain:

E = 1 / ( 1 + 3 beta (3 P + Q) / ( 4 gam3 N ) + 3 alpha P Q ((NB + 1) log(P) + P) / (2 N^2 NB gam3) ).

This last equality shows that when the memory usage per processor N^2 / (P Q) is maintained constant, the parallel efficiency slowly decreases only because of the alpha term. The communication volume (the beta term) however remains constant. Due to these results, HPL is said to be scalable not only with respect to the amount of computation, but also with respect to the communication volume.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/software.html0000644000000000000000000001030511256503657013461 00000000000000 HPL Software

HPL Software

Download and Installation

  1. Download the tar-gzipped file, issue then "gunzip hpl.tgz; tar -xvf hpl.tar" and this should create an hpl directory containing the distribution. We call this directory the top level directory.
  2. Create a file Make.<arch> in the top-level directory. For this purpose, you may want to re-use one contained in the setup directory. This Make.<arch> file essentially contains the compilers, libraries, and their paths to be used on your system.
  3. Type "make arch=<arch>". This should create an executable in the bin/<arch> directory called xhpl. For example, on our Linux PII cluster, I create a file called Make.Linux_PII in the top-level directory. Then, I type "make arch=Linux_PII". This creates the executable file bin/Linux_PII/xhpl.
  4. Quick check: run a few tests (assuming you have 4 nodes for interactive use) by issuing the following commands from the top level directory: "cd bin/<arch> ; mpirun -np 4 xhpl". This should produce quite a bit of meaningful output on the screen.
  5. Most of the performance parameters can be tuned, by modifying the input file bin/<arch>/HPL.dat. See the tuning page or the TUNING file in the top-level directory.

Compile Time Options

At the end of the "model" Make.<arch>, the user is given the opportunity to override some default compile options of this software. The list of these options and their meaning is:

-DHPL_COPY_L force the copy of the panel L before bcast
-DHPL_CALL_CBLAS call the BLAS C interface
-DHPL_CALL_VSIPL call the vsip library
-DHPL_DETAILED_TIMING enable detailed timers

The user must choose between either the BLAS Fortran 77 interface, or the BLAS C interface, or the VSIPL library depending on which computational kernels are available on his system. Only one of these options should be selected. If you choose the BLAS Fortran 77 interface, it is necessary to fill out the machine-specific C to Fortran 77 interface section of the Make.<arch> file. To do this, please refer to the Make.<arch> examples contained in the setup directory.

By default HPL will:
  • not copy L before broadcast,
  • call the BLAS Fortran 77 interface,
  • not display detailed timing information.
As an example, suppose one wants this software to copy the panel of columns into a contiguous buffer before broadcasting. It should be more efficient to let the software create the appropriate MPI user-defined data type since this may avoid the data copy. So, it is a strange idea, but one insists. To achieve this one would add -DHPL_COPY_L to the definition of HPL_OPTS at the end of the file Make.<arch>. Issue then a "make clean arch=<arch> ; make build arch=<arch>" and the executable will be re-build with that feature in.


[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/hpl/www/spread.jpg0000644000000000000000000004636311256503657012736 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀÞ¡"ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú(¢€ ÏþÝÑÿ¶?²?µl´ÿçËí ç}Ýßs;¾ï=:sZó¥õ¢Åñ4X_é×mâ‰üeo} ò#HdÓb¥$@>aÆýÂèº(¢€ (¢€ Žyáµ·–ââXá‚$/$’0UE$’xsRWñCDÓµ¿‡šÇö¿öI¯mþv]“$Oµ¸#8Éàä{PQa}o©éÖ×öry–·Q$оÒ7#Tàò2ëV+Ÿð'ü“Ï Ø*×ÿE-tQEQE—‰tPépkzlº€vCh—HÒ†\îÎFF8Á­JùóO¾øwã]#EÒáÓt{M+XÚxc’{½Fe`#PŒ«vnZF$*ðƒ‘ôQEQEgêzî¢y_ÚÚ­‡Ÿ/íw üc8ÜFq‘ÓÔUÈ'†êÞ+‹ycš PÂKör|§f%ŽöÌŸ78sóÐãÚQEQEV|Úæ™¹m¢IyÔ®biâ¶,c^¬q÷G¦qœgâ~‹ÄQü`Ò¯uÿ ÿgÞj/¨L÷&ò KG寍ƒËíR5$’ìÜr(Û(¢Š(¢Š*ž¥«iº5ºÜjš…¥Œ áK©–%-‚p 3€N=\¯3øÓy èÖ>#ÔîôÛ 55‘î ´i†92¤ŽØ sÎÒèn­¦ë6íq¥ê—Ð+”i-fYT6Á*HÎ8÷r¼Ïàb,^¸·#{85;ˆí/VÙ¡7Ñ13É$å}‚ꦽ2€ (¢€ (¬¿Ã}sá]^ ,È5 ,¦KS›JP„Ãdm;±ÎF(K]wG¾ÔgÓ¬õ[‹è7yÖÐÜ#ÉÓµ·(9$ž†´+æ„z~¦|EÐNiîõ)-.¢Öm¯,L‘3†Vè¹8<žX ¿KÐEPEP^¥â]F¸[}S[Ólgd±Ý]$LW$d ã Œû¹cg©ÙÇyawݬ™Ù4 °H8aÁÁ~ÁüXŽÚ.ÁàÐôkÝkT½‡I´»ÔmRQldÜCr¬H™làòq¥iV:—o¦i–ÑÛYۦȢNŠ?™$ä’y$’rM\¢Š(¢Š(¯¬¾6øîúÑ.b±ðàGÎE>x8ÿžžÕôM|wáßù[À¿ô#YU›ŒnŽì¿ õ\gµ¿Èô/ø\^>ÿŸ? ß©ÿøº?áqxûþ|ü5ÿ~§ÿâë‘¢¹ý¼Ï_û/ÙýçO{ñ·Çv6s-‡ &2)óÉÇüô÷¯¢kã¿È çþÿ¡ ûº)MÊ7g‘˜aáBªŒ6·ù…pþ7ñ'‰4ÏøoCðÔzS]jÿjËêK!Eò‘_ªŒ‚Ýn•ÜWŸø·þJ÷ïû‰ÿé:Ö§Åßÿ©ÿ&èÿ‹¿ÿR7þM× Q@ÿþ¤oü›«ñ'‰5?ø“Cñ,zRÝieÃé« FóQŸ«œœ½‡~µÜWŸøKþJ÷Ä_û†é;P QEäþñ'Å?xrÓ\°Á±ÚÝoØ“­ÐqµÙ@$uSÞ¶?âïÿÔÿ“t|ÿ’C¡ÛÇþ”I^@ÿþ¤oü›¬ø“⟄|9w®_ÇàÙ-mvoHè¹Üêƒ:°ï^±^ñ·þI»ÿnÿúQzQ@?üIâM3Ä~Ðü5”×Z¿Ú²ú’ÈQ|¤Wê‡# ·cÛ¥Wÿ‹¿ÿR7þMÑâßù+ß¿î'ÿ¤ë^@ÿþ¤oü›£þ.ÿýHßù7^Epþñ'‰5?ø“Cñ,zRÝieÃé« FóQŸ«œœ½‡~µÜWŸøKþJ÷Ä_û†é;W P^Oáo|Sñw‡-5Ëü­Öý‰:Ý]äGU=ëÖ+Ïþ É!пíãÿJ$ þ.ÿýHßù7Gü]ÿú‘¿òn½ŠòýGÄŸü9¨è_Û‘øQìu-VßOo°­Á‘|ÂrFòàyçW¨WŸüSÿ™+þÆ»ýž½€ â|O­øµ|]g øY4MòXIy+ê‚\adT˜Ïûc¨õæ»jãæÿ’ÉgÿbüÿúQ gÿÅßÿ©ÿ&èÿ‹¿ÿR7þM× Q@ÿþ¤oü›­‡ž$Ö É!пíãÿJ$¯@ ?ÿ‹¿ÿR7þMÖ~£âOˆþÔt/íÈü(ö:–«o§·ØVàȾa9#yp<óŽ+Ô+Ïþ)ÿÌ•ÿc]þÏ@EPŸø“Äž0ÿ…‡…ü/‡ÿ ¡¨<ššÍÿ=LdÿwŒzóGü]ÿú‘¿ònù¸_û•?öî½€<ÿþ.ÿýHßù7Gü]ÿú‘¿òn½Šãþx“Xñ¬nGb—Ún«6žßaW·–$o$žIçŽ1Åvçÿ ?æuÿ±®ûÿd¯@ Š( ¾;ðïü€­¿à_ú¯±+â½MûF• ¿n½‹vï’)v¨ùAŠçÄÊ1‚rî{9 µ±%wnöꎒŠÉþÇ?ôÔïÿÿZìsÿA=Gþÿÿõ«‡ÛRïøSý›þE÷¡Þ"ÿÏüÿBö%|W¬é¿gÒ¦—íײíÛòK.å?0ê1_f%õ¼šŒÖ &n¡Š9¤M§åG.ç§&7ü½Åwa¥A¸÷>[<¡VŽ"1ª¬íÞýYb¼ÿÅ¿òW¾ÜOÿIÖ» c[Ó´4ºÔ®<˜ä• Œ*4,Œp¨ˆ ³±ôPOô¼_ã³­sà][BÕfHgKׂòÊS¶0pG*z©‘È=ëyIE9>‡•F”«T(o&’õzñE|Ÿý£âú€V´±4ê¾XœXü—¦ªÖJÍÛG×WúÙEVç’yÿÁ/ù$:ý¼éD•èáÿ |ý·ðãIÔá-ñ]‡ç£Xj^T)‰~UÚqœdû“]‡ü*ÏúŸ|sÿƒþ€=¼ÿãoü’wþÝÿô¢:?áVÔûãŸüöÇüRðö'Ã[Qÿ„·ÅwþO“þ©y°¾fEù—hÎ3‘î{…Q@âßù+ß¿î'ÿ¤ë^^_ñ#JþÛøà;í÷ÖwöúM„ÞTɈQ¾VÁÆqƒìMh¬ÿ©÷Ç?ø8ÿì(Ð(¯?ÿ…YÿSïŽðqÿØQÿ ³þ§ßÿàãÿ° Â_òW¾"ÿÜ3ÿIÚ½¼¿á¾•ý‰ñÇÚwÛï¯üŸìïô›ù¼ÙŸ0»|ÍœgØ õ +Ïþ É!пíãÿJ$¯@¯ø[àí¿‡N£ÿ oŠì<ï;ýÃRò¡LLëò®ÓŒã'Üš÷ +ÏÿáVÔûãŸüö¬ÿ©÷Ç?ø8ÿì(ø§ÿ2Wýv?û=zx¿Œ<ÿæ£àëÏøI¼Gªoñ-”^N§çƹ,wÚ0Üc>„ú×´P\|ßòY,ÿì_ŸÿJ!®Â¼ßÅ:ü$ôû?í}WKÙ¢M/¦\ù6'Œm-ƒ•ç8õÒ€="ŠóÿøUŸõ>øçÿý…ð«?ê}ñÏþ?û ô óÿ…Ÿó:ÿØ×}ÿ²Qÿ ³þ§ßÿàãÿ°ªÿm~çx²ÏíÜy%¼‹Î¸}òI´F7;wcŒ“ÜФQEçÿ¿äè_öñÿ¥W W‡ü-ðößÃ'Qÿ„·Åvwþa©yP¦&uùWiÆq“îMvð«?ê}ñÏþ?û ô óÿŠó%Ø×cÿ³Ñÿ ³þ§ßÿàãÿ°®_Æ ÿ„sQðuçü$Þ#Õ7ø–Ê/'S¿óã\–;‚ín1ŸB}hÚ(¢Šóÿù¸_û•?öî½¼ŸÄšü$c³þ×Õt½žùÚeÏ‘#b䥰r¼ç zVÇü*ÏúŸ|sÿƒþ€=ŠóÿøUŸõ>øçÿý…ð«?ê}ñÏþ?û >Ìëÿc]÷þÉ^^oðv×ì:w‹,þÑ=Ç‘â[ȼë‡ß$›Dcs·v8É=ÍzEQEñχÿämÿÿÐ}_hw¶èÐ$·P£ÙV>c\XôÝ5nÿæ}G Ô„1²sv÷^þ±6¨ª¿ÚV?óûoÿWühþÒ±ÿŸÛûú¿ã^G$»¡}j‡ó¯½¼Aÿ ;Ÿøþ„+è­kÃúÇü&·Úžƒe´÷VVÈÚŠË$”i¼ÈÙš˜’÷0DcæBŸ›µËÛI´k„Šês· ²OÌ+èßøUŸõ>øçÿý…zøÕ7~ÿä~{Å•!où,–ö/Ïÿ¥×a\±«iº7ÅËSP´±´)Ñdº™bRÞ|G±8ãØÐyEsÿðø?þ†½ÿ0ÿñTÂwàÿúô?üÃÿÅPA^ð³þg_ûï¿öJè?á;ðý zþ aÿâ«›øI<7Vþ0¸·–9 —ů‘°eu! Ž#œÐ¢QEçÿ¿äè_öñÿ¥W W“ü ñg†ôÏ…º5ÿˆ4«K¨üýðÏ{n¹žB2¤ädî?á;ðý zþ aÿ⨠¯?ø§ÿ2Wýv?û=tðø?þ†½ÿ0ÿñUÅüBñ.ƒ¬Üx2ßKÖôÛé×Å.ÑÚÝ$¬.2B’q’}Åz¥Q@ÿ7 ÿr§þÝ× W™êZ¶›£|}[SP´±¼.dº™bRßj'±8ãØ×Yÿ ߃ÿèkÐÿðcÿ@ÏÿÂwàÿúô?üÃÿÅQÿ ߃ÿèkÐÿðcÿ@ÿÂÏùìk¾ÿÙ+Ð+ÎþO Õ¿Œ.-åŽh%ñEëÇ$l]HB#‚ç5è”QEWÎ_´÷ÀúuÅÖ“c<ïæî’[dflHÀd‘ž€Wѵà_ äŸéŸö×ÿF½xE9C oy~Lô²¸©Vi®Ÿª5áðÿý´Ïüü(ÿ„_ÃÿôÓ?ð?ð­j+ã>±WùŸÞÏ{Ùò8Oˆ:eà}Fâ×I±‚tò¶É²+.dPp@ÏBkèÚð/‰_òOõ?ûeÿ£R½ö¾Ï‡g)ádäïï?ÉiÉ%Óõa^oã» =Oâ—ÃÛ;ûH.ídþÒß ñ‰±‘•<á^‘^âßù+ß¿î'ÿ¤ë^ùæü žÿ¡SCÿÁt?üMð‚x?þ…MÿÐÿñ5ÐQ@ÿü žÿ¡SCÿÁt?üMrþ°³Ó>)|B³°´‚ÒÖ?ìÝÁ018QÀÉ$þ5éåvÚÒøwÇ?õ‡‚IÅ•¾Ÿ7”‰r-›åž­Œ(É<@©EGËsoè$ "Q$lŒåX§Ø€Gzçìü]¡ãY´ HcžÞyZKÔœ·4bH6cª¬Ñ’ÙÆI^ àžh:=ïôë‹­&ÆyßÍÝ$¶ÈÌØ‘€É#=®—þÿÐ LÿÀHÿ²¾ÿÉ?Ó?í¯þzêëóŒuz«U)?‰õógÕaéÁуil¿#'þÿÐ LÿÀHÿ¹¯ˆ:eà}Fâ×I±‚tò¶É²+.dPp@ÏBk»®SâWü“ýOþÙèÔ£^«ÅRNOâ]|Ðb)ÁQ›Ilÿ#ßh¢ŠýùSÍüwag©üRø{giݬŸÚ[áž1"6 R2§ƒ‚ü+¨ÿ„Áÿô*hø.‡ÿ‰®Å¿òW¾ÜOÿIÖ½€9ÿøA<ÿB¦‡ÿ‚èøš?áðý šþ ¡ÿâk ¢€<ßÀ–zgÅ/ˆVvZZÇý›²#¢æ' 8$ŸÆ½"¼ÿÂ_òW¾"ÿÜ3ÿIÚ½€ ò„ðÞ§ð·F¼¿ðþ•wu'Ÿ¾i죑ÛÈXŒœá^±^ðKþI…ÿoúQ%tð‚x?þ…MÿÐÿñ4 àÿú4?üCÿÄ×AEy_Ä/ h:5ǃ.4½M±¼Qb%­ªDÅrç¨ãØWªWŸüSÿ™+þÆ»ýž½€ àõ'MÖ~.XÛêš}¥ô ¡NëÔ+*†óâÁ#>æ»Êãæÿ’ÉgÿbüÿúQ h àÿú4?üCÿÄÑÿ'ƒÿèTÐÿð]ÿ]Ïÿ àÿú4?üCÿÄ×7ð’mmüaoopÁŠ/R8ãPªŠ¯D¯?øYÿ3¯ýwßû%zQ@OðƒÂ~Ôþè×—þÒ®î¤ó÷Í=”r;byË“€ü+¸ÿ„Áÿô*hø.‡ÿ‰®à—ü’ þÞ?ô¢Jô çÿáðý šþ ¡ÿâk‹ø…á­F¸ðeÆ—¢i¶3·Š,Q¤µµH˜®\à•ã { õJóÿŠó%Ø×cÿ³Ð QEæz–“¦ë?VßTÓí/ _ ‡Xî¡YT7ÚˆÈ Î ÷5Ö àÿú4?üCÿÄ×?ÿ7 ÿr§þÝ× P?ÿ'ƒÿèTÐÿð]ÿGü žÿ¡SCÿÁt?üMtPü$‚[[ÛÅ0Eâ‹ÔŽ8Ô*¢€€Æ+Ñ+ÏþÌëÿc]÷þÉ^@Q@|ÃàøwGð}……þ£äÜÅæoO"FÆdb9 GB+éêò¯ƒßòJôoûoÿ£ä¯âjÔéa#*‘ms-º?'ùx:²¥Q¸ö9øY^ÿ ·þKKÿÄÑÿ +Â?ôÿÉiøšö +áþ¿„ÿŸRÿÀ×ÿ z_]­å÷?ó>~ñ¿ü;¬x>þÂÃQó®eòö'‘"ç)<• 5ëßð»~ÐÃÿ’Wün²¾0ÿÉ+Öí‡þ޽V¾ã†kS«„”©Å¥Ì÷wè¼—äy¸Ê²«Q9v8{‹þÔõk =w̺º•!…>É8Üì@Q’˜$u¬‰ÞáψþÕµk³ØÁý£æK±ŸnèQG <8¡ñOþd¯ûìöz<[ÿ%{á×ýÄÿôkèŽ@ÿ…Ûðóþ†ü’¸ÿãtÂíøyÿCþI\ñºô (Ïÿávü<ÿ¡‡ÿ$®?øÝsþ¸Ó¼}â_Š?Ù7Û¬u[K;hͼ‘–ØÛO<gìçþÿ’½ñþáŸúNÔÉϬü5Ò¼U-ΩâhƱizdºXôùêùpì­ çÌ-þÙ?5`XüSð>ãS}igâcd—[²ÞË8ºi™Yäd–Q°nV8ù·— ÁYEz_ÂÏùìk¾ÿÙ+R†^ ºñºåLJí&¿•ËÈÒnh݈Á&"vzço^zó@àøwGð}……þ£äÜÅæoO"FÆdb9 GB+¡ÿ…•áú ä´¿üMu¿ä•èßößÿGÉ]Å~c˜cpÑÅÕŒ©Éµ)}¥Ýÿqþg³KZ4â•´K£ÿ3ÇÿáexGþ‚ßù-/ÿ\÷üoáÝcÁ÷ös/—±<‰8‘I䨯 k‡øÃÿ$¯Yÿ¶ú>:2ünXºQ9&å´»¯î/Ì*âëJœ“¶©ôæz­ϯˆfOêzTö3­¥­¥´ÑMFC#HÓÂ;vÊŽU³Á\æøŸÅ­½Å•žBy๸y/¬ndÂÂF¡Îæ•>n€áŽ~œxÆÄoNðçÄêÚµÇÙì`þÑó%ØÏ·t(£…žH Ðÿ…Ûðóþ†ü’¸ÿãuâ{Á¨øÿáꘈ¸‚þ`arèw[¡ùX€Hç‚@Ï é]¥|ÞqÄÙµÕgÍu{ÞÝZìûS¥Î¯sœÿ…Ûðóþ†ü’¸ÿãtÂíøyÿCþI\ñºèè¯+ýtÿ§ù7ÿj_ÕüÎ3á¾·§xâ?µm&ãí3ÿgyrìdݶSÃG ŽEz…yÇäª|Bÿ¸oþˆjôzû,-ooB­nd½UÎy+6‚¼?áoÅ/øsáÆ“¤êÚÏÙï ó¼È¾Ë3íÝ3°åPƒÁƒ^á^ðKþI…ÿoúQ%n ÿ…Ûðóþ†ü’¸ÿãtÂíøyÿCþI\ñºô (Åüañ¾.Ô|a¡ê¿kºÄ¶S2}žXð€°'. uaù×´WŸüSÿ™+þÆ»ýž½€ óø§FðÅ=>ÿ\¼û%¬š$ЫùO&\Ï è§ò¯H®>où,–ö/Ïÿ¥Ðü.߇Ÿô0ÿä•Çÿ£þoÃÏúòJãÿ× Q@ÿ ·áçý ?ù%qÿÆê¿ÁÛë}ONñeýœže­×‰o&…ö‘¹FTàò2ë^‘^ð³þg_ûï¿öJô (¢€"xWÅÚƒ¬4=WíwQø–ÊfO³ËåÔ¬?:öŠóÿŠó%Ø×cÿ³Ð QEäþ$ñNáޱßë—Ÿdµ“ÃB)äË›’@Âz)ü«cþoÃÏúòJãÿÑÿ7 ÿr§þÝ× PŸÿÂíøyÿCþI\ñº?ávü<ÿ¡‡ÿ$®?øÝzæÿo­õ=;Å—öry–·^%¼šÚFäaSƒÈÈ#­zEyÿÂÏùìk¾ÿÙ+Ð(¢Š(¯*ø=ÿ$¯Fÿ¶ÿú>JõZùÇáÿµí#Àúu—„¾ßo›²çûI"ß™Ÿ”©#‘øWÍñF¦#Sµù“Õ¥Ñ÷hkGïÖ’ŠÛWcÜh¯4ÿ…‘âúÿò±ÿGü,Їÿ•ˆ¿øšøOì|_hÿàpÿä‡ý­€ÿŸÑûѧñ‡þI^³ÿl?ô|uêµóÄkÚ¿õß }‚Þ_+}Ïö’K³)(PNHñ¯£«îø_ Sƒ”*ZüÍèÓè»6'ˆ£ˆ÷èÉIm£¹çÿÿæJÿ±®Çÿg£Å¿òW¾ÜOÿIÖŠó%Ø×cÿ³ÖÄ‹½FÇâ?€nt/ûRù?´|»?´,fa@~và`yëŒw¯¤êWŸÿÂ[ñþ‰‡þWíÿÂøK~!ÿÑ0ÿÊý¿øP WŸøKþJ÷Ä_û†é;Qÿ oÄ?ú&ù_·ÿ Ïøow¨ß|Gñõέ¥ÿeß?öw™gö…ŸËÄ.μ€3ŽÔ¡ð³þg_ûï¿öJô óÿ…Ÿó:ÿØ×}ÿ²W P•|ÿ’W£Ûý%wä_ õ_[|>ÒâÒ|ý¥b¾o—wý­ >gï\Ÿ‘†FG=qšë?·>!ÿÑ5ÿÊí¿øWæ˜þÌjâªÔ…;§)5¬vmù‘«›;áþ0ÿÉ+Öí‡þާþÜø‡ÿD×ÿ+¶ÿá\ŸÄÍWÆW?µHµofØ·•æ]ÿkC7—ûÔ#äQ““ÇLæŒæ4±TªN’”[Ö;&¼ÂU`âÒgI¨x·CÕ¯^ïRø_ã Ù4–çEóP„.Pìg+‘æ>ùˆÎ+ŽøâÍ&ÇÃÐe|=ÕtlÊð?Û¬¥Óí¤ŽTb"TÞÍåGÃd‡Ò¾‡¢¿K8Ï™´¯‰Þ&ñW‚&›Ã ÓZîÚ$ÒÓÌ^%P‘£¨#]…²äsÀÀ¯x‚F–äx^u ÑHAd$}Ó´‘‘Ó‚G¹®7â|ú·Äˤé¿ÚWËý¡åÚyë™û¤çnO=qŠ—ûsâý_ü®Ûÿ…|_e8¼n&3ÃÓºQµî—W¦­jF*ÍÇn|Cÿ¢kÿ•Ûð£ûsâý_ü®Ûÿ…|÷ú·šϯüš?æmí¡ÜŸÀ¿òU>!Ü7ÿD5z=ygà õŸˆ><—VÓ³o›û?Ì´óÖo/÷NμŒ:gêuúV”©aiSš³QŠ~©#ŽNòmyÿÁ/ù$:ý¼éD•èóç߈zö…à-3M²ðwö…¼>n˯í4‹~ev?!RF #¯lÖõ*š½I$¼ô1«ZÍVJ+ÍØúŠò_ø[^(ÿ¢{ÿ•¨¿øŠ?ámx£þ‰ïþV¢ÿâ+/®á¿çä~ôsÿh`ÿçìð%þf×Å?ù’¿ìk±ÿÙëÐ+ÁüCãgÄÚǃìµ eBž#²”Oý “î`ÄmÚª1ßoz÷ŠÚ!QsA¦¼ŽŠuiÕ59&¼Â¸ù¿ä²YÿØ¿?þ”C]…y¿ŠugLø§§Í¡è?ÛWM¢LoöĶڞ|d¾ç8 {ûUš‘Eyÿü%¿ÿè˜å~ßü(ÿ„·âýü¯Ûÿ…zyÿÂÏùìk¾ÿÙ(ÿ„·âýü¯Ûÿ…Wø;5ÅÆâɯ-~Éu'‰ok0Iå9–MÆÁÈÈëŠôŠ(¢€<ÿà—ü’ þÞ?ô¢Jô ðÿ…¾"ñ•Ã&ÛIð'ö¥ŠyÞ]çö¼0y™™Éùd`’9ëŒ÷®Ãþ߈ôL?ò¿oþèçÿÿæJÿ±®Çÿg£þ߈ôL?ò¿oþËøÃ\ñV§¨ø:sÁߨ¶«â['[í8®w>XÚƒ# “ŸozöŠ(¢€<ÿþnþåOý»¯@¯'ñ&£¬éŸc›Cж®›ÃAßí‰mµ>ÒI}Î0p@÷ö­øK~!ÿÑ0ÿÊý¿øP Q^ÿ oÄ?ú&ù_·ÿ ?á-ø‡ÿDÃÿ+öÿá@ÂÏùìk¾ÿÙ+Ð+ÍþÍqq§x²kË_²]Iâ[ÇšßÌyNDe“pá°r2:â½"€ (¢€ ùÃáÿü‰wýµÿÑ_G×ÎÿäHÓ¿í¯þŒjðóÿ÷hÿ‰~Lù¾(ÿsø—äΚŠ(¯>æ~ È‘¨ÿÛ/ýµô}|áñþDGþÙèů£ëëò÷i‰þHû¾ÿs—øŸä?ø§ÿ2Wýv?û=-ÿ’½ðëþâúN´|Sÿ™+þÆ»ýžÿÉ^øuÿq?ý'Z÷¤=Š( ¼ÿÂ_òW¾"ÿÜ3ÿIÚ½¼ÿÂ_òW¾"ÿÜ3ÿIÚ€…Ÿó:ÿØ×}ÿ²W WŸü,ÿ™×þÆ»ïý’½€<ÿà—ü’ þÞ?ô¢Jô óÿ‚_òHt/ûxÿÒ‰+Ð(¯?øÛÿ$‡]ÿ·ý(޽¼ÿãoü’wþÝÿô¢:ô (¢€<ÿÅ¿òW¾ÜOÿIÖ½¼ÿÅ¿òW¾ÜOÿIÖ½€ (¢€<ÿÂ_òW¾"ÿÜ3ÿIÚ½¼ÿÂ_òW¾"ÿÜ3ÿIÚ½€ ùÃáÿü‰wýµÿÑ_G×ÎÿäHÓ¿í¯þŒjðóÿ÷hÿ‰~Lù¾(ÿsø—äΚŠ(¯>ÇÖä7áû¬ÿô#_@×ÏÚÏü†ü!ÿcŸþ„kèûLýÑz³ôÿq^¬+›þK%Ÿý‹óÿéD5ØW7ü–K?ûçÿÒˆk×=ã°¢Š(¯?øYÿ3¯ýwßû%zyÿÂÏùìk¾ÿÙ(Ð(¢Šóÿ‚_òHt/ûxÿÒ‰+Ð+Ïþ É!пíãÿJ$¯@ ¼ÿâŸüÉ_ö5ØÿìõèçÿÿæJÿ±®Çÿg @¢Š(ÏÿæáîTÿÛºô óÿù¸_û•?öî½€ (¢€<ÿágüοö5ßì•èçÿ ?æuÿ±®ûÿd¯@ Š( ¾'Ò|G¯XiÛYj^MºnÙŒ±'’3Ôšûb¾³ÿDü{œ?–á3L¨âà§ìûÝ+þ,åÅÓ…H%8¦¯Õ'ù›ßð—ø£þƒ?ù+ÿGü%þ(ÿ ÏþJÅÿÄÖEõÿêvEÿ@ÑüÌóþ©‡ÿŸqÿÀWùµoë×údÖ׺—nûwÇä"ç ä õ¾Ø¯…¯?ãÑÿç_t×Èq[„˱1£„‚„\S²ïv¯ø#ÐÂS…85¤¯Ñ%ùñOþd¯ûìözÏø‘i¨ß|Gð ¶“ªeß?ö—yöuŸËÄ(OÈÜ€G=3žÕ¡ñOþd¯ûìöz<[ÿ%{á×ýÄÿôkÃ:ƒþ/ˆôSÿòoþ4Â%ñþŠþP-ÿƽŠóÿøD¾!ÿÑOÿÊ¿øÖÃ{MFÇâ?­µmSûRù?³¼Ëϳ¬far>Eà`8ëŒ÷¯P¯?ð—ü•÷ ÿÒv ágüοö5ßì•èçÿ ?æuÿ±®ûÿd¯@ ø[áß_|8ÒntŸÿeØ¿åÙÿdC?—‰œŽNH'ž™Çjì?áø‡ÿE?ÿ(ÿãGÁ/ù$:ý¼éD•èçÿð‰|Cÿ¢Ÿÿ” ñ®?â—‡|ecðãV¹Õ¼wý©bžO™gý‘ ff@>u9$:ãëÜ+Ïþ6ÿÉ!×íßÿJ# @¢Š(Ëþ$Zj7ßüm¤êŸÙwÏý£åÞ}gòñ ò7 ÏLçµhÂ%ñþŠþP-ÿÆÿÉ^øuÿq?ý'Zô óÿøD¾!ÿÑOÿÊ¿øÑÿ—Ä?ú)ÿù@·ÿô (ËþÚj6?ü}m«jŸÚ—Éýæ^}`ó3 ‘ò/Ç\g½z…y߆g†×â·Ä«‹‰c†“Ny$‘‚ª(·bI'€ç5è”WÄúOˆõë 2k-KÉ·MÛ#ò±–$òFz“_fYêÚn¡quoe¨Z\Ïhû.c†ev…²F•9R0}¥|Egÿ‰øÿ:÷8-Âf8™QÅÁN*-Ù÷ºWüYË‹§ JqM_ªOó7¿á/ñGýòV/þ&øKüQÿAŸü•‹ÿ‰¬Š+ëÿÔì‹þ£øÿ™çýSÿ>ãÿ€¯ò:=]Öu?øZ Fÿí®µhá<”L7˜r v&¾»Žxfy’)cw…öJªÀ”m¡°Þ‡k)ÁìAï_xSþG¿ Øb×ÿF­}‘kN«â©4;;énî5Xeýü/6Ò/t;ï±Ý>,M'’’e ŠHÂ:¨çÚµ¼K¹¨=•æ¹g©Aq–£-c¡EtK:Û…F‹}ÊaÇš2^<€W* '†øÎ—ÜxJ;Ë8,®—Gk[pp¸+¹@Pr àu®|ª…:øÚtª+ŽMj6¢Ú0ÿámüCÿ¡›ÿ$-ÿøŠ?ámüCÿ¡›ÿ$-ÿøŠã¨¯ÑÕ¼¯þ}äÒÿ3“ÛO¹ØÿÂÛø‡ÿC7þH[ÿñëîî5ëw·’ù·WäòÍ&лݣˆ±Ààd“À¯œëèoÙßþDMSþÃÿ訫渗+Â`©S–­·}[éæÙµÊMÜõÊ(¢¾<è"x>óÂ>ÔµËxÊK«_+bO«‡t¨‡ (=÷¯‡âjò­‹Œ¥_uu¿VcSSýMÞ§QñOþd¯ûìöz<[ÿ%{á×ýÄÿôhø§ÿ2Wýv?û=güHÒ¿¶þ#øNû}õ‡ý£þ“a7•2bo•°qœ`û_:fz…çÿð«?ê}ñÏþ?û ?áVÔûãŸüöèçþÿ’½ñþáŸúNÔ¬ÿ©÷Ç?ø8ÿì+?ᾕý‰ñÇÚwÛï¯üŸìïô›ù¼ÙŸ0»|ÍœgØ ÐøYÿ3¯ýwßû%zyÿÂÏùìk¾ÿÙ+Ð(Ïþ É!пíãÿJ$¯@¯ø[àí¿‡N£ÿ oŠì<ï;ýÃRò¡LLëò®ÓŒã'Üšì?áVÔûãŸüöèçÿäë¿öïÿ¥Ñÿ ³þ§ßÿàãÿ°®?â—€?±>jÚü%¾+¿ò|ŸôkýKÍ…ó2/Ì»Fqœp(Ü(¢ŠóÿÿÉ^øuÿq?ý'Zô òÿ‰WößÄéßo¾°ó¿´Òl&ò¦LBò¶3ŒbkCþgýO¾9ÿÁÇÿa@Eyÿü*ÏúŸ|sÿƒþÂøUŸõ>øçÿý…s÷¶/©xƒã-¤QÏ,Òiöb8àfî-ª¼œßâÎ ‘]E†¡âïíh`ÓçþÆóQ[‹ó>Ïw’ûÌÝ·’Z=ùä¦~ZÇøo¥b|Gñööûëÿ'û;ý&þo6gÌ.ß3`gÀö³ü5á«Ïê>+¼¼ñŠìþËâ »8a±ÔÌq¬jAPƒŒn#Ž0¡ðò;95m" ?Mò¤ÐtI4½Vå ‹xd…šÜyŒ¬“1+•ýærK×ÌÖñèŸó¯fOÙï^—ÅWwïâxí ûCÏkx¬óÝ–ß”g8@– ÷‡r<»FÐ-ït¨nâé÷eQÀ G§µ} ו\¥ó{¯­º£Jx:˜·ìéîµ(Ñ[Ÿð‹ZÏÝïýüáGü"ÖŸó÷{ÿøWÜiVÿŸ_ù26ÿWñ~_yCC†âãÅZ 6w_dº“R·Hn<±'”åÀWÚxl¸¯¥áø‡ÿE?ÿ(ÿã^ ¢h°iÞ4ð´ÑÏq#6µh¸‘Áëôö¯®ëàx†´ªã\¥h´Ü¦xY{:›ž7ã_…¾<ñN ÇŽ-5DŽáfÝiÉhªB°Ü0ÄŸ›Æ9'< ñÿø+\ðLún¬j‘܆F·ŽÞGxáMÀ7Œ±b@ù9¯±+Ë|sàÍ?Ç?4Ý3R¸¼‚´y®­U‹ £\ÊF0Ç·¥yØDpøˆV’ºO¦ŒÊJêÇÍôWÐßðÎþÿ ¿ˆ?ð&/þ7Gü3¿…è/âü ‹ÿ×Úÿ®4?çÓûÑÍõwÜù澆ýÿäDÕ?ì1/þŠŠøg ÿÐ_Äøÿ­‚ÚtZ>â}2‘á³ñݼm!™QcPI àsÅxyæyO2§B YßSZTœétQE|Ù±çÿ¿äè_öñÿ¥W WȸÖ#ðýªÚø—\³„oÛ­óÇ|ç8QÀÉÉú“Zßkñý>&ÿÁœ•ëÒÉ1u ªE+5}ûž•<«8)Å+=w>©¯?ø§ÿ2Wýv?û=x·ÚüAÿC‰¿ðg%%¤ú¤¾,ð¢ÞëúÆ¡öå¡^Þ4¨™÷€=úŒûššù6*7RiYyŠ®Wˆ¥9%eæ}UEW”yÇŸÿÍÂÿÜ©ÿ·uèäþ$Ð?á#øëŸö¾«¥ìðЗÎÓ.|‰$m-ƒ•ç8õÒ¶?áVÔûãŸüöèWŸÿ¬ÿ©÷Ç?ø8ÿì(ÿ…YÿSïŽðqÿØPð³þg_ûï¿öJô 󃶿aÓ¼Ygö‰î<ÞEç\>ù$Ú#»±ÆIîkÒ(¢Š(¯<5ÿ"ý¯üÿC5õý|qáûû8t;x廂7²¯ |ǵ{Ü=8Ã''ouþhõòYF8†äí§êê*§ö¦Ÿÿ?Ö¿÷ùÆíM?þ­ïòÿ}·¥üËïGÓûj̾ò§‰ä_ºÿ€èb½ÿãoü’wþÝÿô¢:ùãÄöshwÅwŽváR@Iù‡júãoü’wþÝÿô¢:øî!œgŠ‹‹¿º¿6|Æu(Ëœ]ôýX|Sÿ™+þÆ»ýžÿÉ^øuÿq?ý'Z>)ÿÌ•ÿc]þÏUüwg¦|Rø{yw¥¬Ú[æžA.`P2Ç’@ükÁ<ƒÒ(®þ¿ÿÐסÿàÆþ*øNüÿC^‡ÿƒøªè+Ïü%ÿ%{â/ýÃ?ô« ÿ„ïÁÿô5èø1‡ÿŠ®_À—özŸÅ/ˆW–p]ÚÉý›²h$#b 88 €,|,ÿ™×þÆ»ïý’½¼ÿágüοö5ßì•èçÿ¿äè_öñÿ¥W W“ü ñg†ôÏ…º5ÿˆ4«K¨üýðÏ{n¹žB2¤ädî?á;ðý zþ aÿ⨠¯?øÛÿ$‡]ÿ·ý(ŽºøNüÿC^‡ÿƒøªáþ/ø³ÃzŸÂÝfÎÃÄUÝÔžFÈ`½ŽGlO8Prp?…zÅQ@âßù+ß¿î'ÿ¤ë^^oã»û=3â—ÃÛËû¸--cþÒß4òÑs–< ’ã]Gü'~ÿ¡¯CÿÁŒ?üUtW?ÿ ߃ÿèkÐÿðcÿGü'~ÿ¡¯CÿÁŒ?üUsþÿ’½ñþáŸúNÔ|,ÿ™×þÆ»ïý’«øþÏSø¥ñ òÂî »Y?³vM‚Dl@Àá‡øU…Ÿó:ÿØ×}ÿ²P WÈÿ‘~×þÿ¡šúþ¾8ðýýœ:¼rÝÁÙW>cÚ½îœaŠ““·ºÿ4zù,£CrvÓõFõSûSOÿŸë_ûü¿ãGö¦Ÿÿ?Ö¿÷ùƾÇÛÒþe÷£éýµ?æ_ynÓþFÏ ØrÓÿFWÕ5ò]®©§¯‰ü5+_Zˆ¡Öme•ÌË„@ù,Ç<ÜšúSþ¿ÿÐסÿàÆþ*¾'=”eŒn.ú#å3y)b[O¢: ãæÿ’ÉgÿbüÿúQ jAãO Ý\Eooâ]iåp‘ÇüLÎÄàd’xÅsúÆ­¦èß,n5MBÒÆЧE’êe‰KyñÄ àc^1æåÏÿÂwàÿúô?üÃÿÅQÿ ߃ÿèkÐÿðcÿ@y_‚¼C¦øjÃÆ7º¥ÌpA'Œ®mÕÕ~gx×?1ÇÑUjí?á;ðý zþ aÿ⫃ð]´Úï‡|`Ú<–—|d÷3ÌV)V9à›ïª· ©Áõ땦x’ÏV×5m*Ú9üÍ7Ëó&eT»÷ݰ'vÖѸe#¨5ÃÙ7GÖ_|IÒ’H|Ȥ‹ûFãÌö²ýÙï%r“œ¿³‡C·Ž[¸#q»*òGÌ{VŸö¦Ÿÿ?Ö¿÷ùÆ¿EÀÖ¤°´Ó’øW_$}¶­5‡‚r[.¾Eº-?älð§ý‡-?ôeTþÔÓÿçú×þÿ/øÔšuå­Ç‹ü*\Ã+ rЕIyƒÒ²Í+S–¢R[w3ÇÕƒÃM&¶>°¢Š+óóãO?ÿ›…ÿ¹SÿnëÐ+Ìõ-[MѾ>­Æ©¨ZXÀÞ²]L±)oµ€Xœqìk¬ÿ„ïÁÿô5èø1‡ÿŠ ‚Šçÿá;ðý zþ aÿâ¨ÿ„ïÁÿô5èø1‡ÿŠ ágüοö5ßì•èç '†ê߯òÇ4ø¢õã’6 ®¤!ÁsšôJ(¢Š+ç?‡Ú{àm:âëI°žwówI-²36%p2HÏ@+èÊð?†ŸòO´¿ûkÿ£^ºðI:ŽýŽì½'Uß·ùßð‹ø{þ€ZgþGþÂ/áïúiŸø øVµéòG±ìrG±ÁüAÐtk/j7ºM„§•¶H­‘Ys*‚z^™ñ·þI»ÿnÿúQpÿäŸjŸöËÿF¥wŸäë¿öïÿ¥×™ITVìxù‚Jª·oóŠó%Ø×cÿ³ÕØYê¾ÙßÚAwk'ö–øgŒHˆŒ©àà€ ±ñOþd¯ûìöz<[ÿ%{á×ýÄÿôká:øA<ÿB¦‡ÿ‚èøš?áðý šþ ¡ÿâk ¢€9ÿøA<ÿB¦‡ÿ‚èøšåü ag¦|Rø…gai¥¬Ù»!‚1.`bp£’IükÒ+Ïü%ÿ%{â/ýÃ?ô¨øYÿ3¯ýwßû%zyÿÂÏùìk¾ÿÙ+Ð(ÉþxOÃzŸÂÝòÿÃúUÝÔž~ù§²ŽGlO b2p…wð‚x?þ…MÿÐÿñ5Ïüÿ’C¡ÛÇþ”I^@ÿü žÿ¡SCÿÁt?üMpÿü'á½3án³yaáý*Òê?#dÐYG®gŒ0ƽb¼ÿãoü’wþÝÿô¢:ô (¢€<ßÇvzŸÅ/‡¶wö]ÚÉý¥¾ã#b#*x8 ºøA<ÿB¦‡ÿ‚èøšçü[ÿ%{á×ýÄÿôkÐ(Ÿÿ„Áÿô*hø.‡ÿ‰£þOÿЩ¡ÿàºþ&º (Íü ag¦|Rø…gai¥¬Ù»!‚1.`bp£’IüjÇÂÏùìk¾ÿÙ(ð—ü•÷ ÿÒv£ágüοö5ßì”èóŸÃíF½ð6qu¤ØO;ù»¤–Ù›¸$g ôexÃOù'Ú_ýµÿѯ]x$G~Çv^“ªïÛüoøEü=ÿ@-3ÿ#ÿ ?áð÷ý´Ïüü+ZŠôù#Øö9#ØãÒÿí¯þzë+Ú¥û8éÑCBöQÓ¢2áð÷ý´Ïüü+[Ñ´½;[ð„Ö:m¬§ÄVJ^Äã tà~U××/ã+gqák–ŽYD> ´Ç vÁc…QÉ'°jkÆ*›ÐŒL"¨ÊÈö+mv;ŸêÛʯemæfG þa`¸89ä–Q«'Æž5{#µ´žâñ&‘~Ù¨GeX”<ÇÎ]™ÑU@îI*5Æø‚m3Äzìš…Þâ•‰í¡ƒÉÿ„\LCFÒ°lÍ€dJFƒÀçµ`ø£ÅÚW†l4»H Ötôûíˆû†£ "Lñ3‘è#È*8 Àçæ<øç‚ixÊÒ×WøÅh5:7Sá¤ÛÝÆ’Ûí ÁÁeÜ2FA#® ©?áð÷ý´Ïüü+°ñ†…©xãL—G‡Uk ¦š¶ÆI”Ç/„[¸+·‘ÆOjô(¤Y¡IT0WPÀ:lPpAö<ש„Qtõ=œ ‹¥®ægü"þÿ ™ÿ€‘ÿ…ð‹ø{þ€ZgþGþ­EurG±ÛÉÄŸ"Ž ÄÐÃÇ~"ºTDUc: ôÊó_ƒ?ò ñOýŒ—ú uéUáÏâgÍÏâaET’ó—„/µ}ÂözeׂüVóÁ¿sE¥¹S¹Ù†3ƒÐŽÕôm¥:’¦ïZU¥J\Ñ<7þ{ÿú,Gyýá«Ë=6ûPûˆ-o&†Æ,žZ,BÀs’9£þŸýH^9ÿÁ?ÿgY™ü-?ú¼sÿ‚þÎøZõ!xçÿÿýð´ÿêBñÏþ ÿû:?áiÿÔ…ãŸüÿötÂÓÿ© Ç?ø'ÿìê¿Ãɯ5?ø×\›EÕt»[ÿ°ý5+S·—£px8#±=G­Xÿ…§ÿRŽðOÿÙÑÿ Oþ¤/ÿàŸÿ³ ágüοö5ßì•èçÿ ã¼þÎñ-åæ›}§ý¿ÄWÃ}ŠO-Â%Oâ8ÈÈ<× P‹ü;ñ…ç„| ¦èwþñ”—V¾n÷ƒI%é]Æ `z0í]Gü-?ú¼sÿ‚þνŠóÿøZõ!xçÿÿýrÿÌëÿc]÷þÉ^^ð³þg_ûï¿öJô ùËÂÚ¾á{=2ëÁ~+yàß¹¢ÒÜ©ÜìÃÁèGjú6ŠÒISw‰­*Ò¥.hžÿ =ÿý 0ÿÁSð“ßÿБãü7ø×¹Q[}r©ÑõúÇ€\Üêºþ½áˆ¢ð—‰mVÛ\´¹–k½5’4[–çÎyã׿ÑEaR£¨ù¤sU«*’æ–çŸüSÿ™+þÆ»ýž›âRçÿì5 kz¥©Ò%µ?ÙvfrŽÓFÃw ÷ô§|XŽóû;ÃW–zmö¡öZÞM ŒY<´X…€ç$sGü-?ú¼sÿ‚þΠÌ?áiÿÔ…ãŸüÿötÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÑÿ Oþ¤/ÿàŸÿ³ þŸýH^9ÿÁ?ÿgGÂxï?³¼Kyy¦ßiÿoñÕä0ß@b“Ëp…ISøŽ224ÂÓÿ© Ç?ø'ÿìèÿ…§ÿRŽðOÿÙÐð³þg_ûï¿öJô óÿ„ñÞgx–òóM¾Óþßâ «Èa¾€Å'–á ’§ñddkÐ(ç/_jú…ìô˯ø­çƒ~æ‹Kr§s³ g¡«oþ{ÿú”š¶DÑôòÑÇ!¸w# …§¿Ìz¯ÿ =ÿý 0ÿÁS{•½d88 ŽÝ:WqEQEQEQEQEQEQEQEQEQEQEQEQEQEQEÿÙhpcc-1.4.1/hpl/www/spreadM.jpg0000644000000000000000000004171011256503657013042 00000000000000ÿØÿàJFIFÿþŒImage generated by GNU Ghostscript (device=ppmraw) CREATOR: XV Version 3.10a Rev: 12/29/94 (PNG patch 1.2) Quality = 75, Smoothing = 0 ÿÛC    $.' ",#(7),01444'9=82<.342ÿÛC  2!!22222222222222222222222222222222222222222222222222ÿÀÍ "ÿÄ ÿĵ}!1AQa"q2‘¡#B±ÁRÑð$3br‚ %&'()*456789:CDEFGHIJSTUVWXYZcdefghijstuvwxyzƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚáâãäåæçèéêñòóôõö÷øùúÿÄ ÿĵw!1AQaq"2B‘¡±Á #3RðbrÑ $4á%ñ&'()*56789:CDEFGHIJSTUVWXYZcdefghijstuvwxyz‚ƒ„…†‡ˆ‰Š’“”•–—˜™š¢£¤¥¦§¨©ª²³´µ¶·¸¹ºÂÃÄÅÆÇÈÉÊÒÓÔÕÖרÙÚâãäåæçèéêòóôõö÷øùúÿÚ ?÷ú(Í„FGõéMsŒ“Ðu¯³»Õ¢›EñD~,Ô§ŸRñoö}Æœ.‹ÚÛÂòH¦?,–çhg¨Þ³š)ëK@Qžq@&FqÞ–¹/‰êçÁ¥Æ¬¾—5¥¼·IAÚXÖ&&0s”$ãçŒPZzQ\ÿ?äžøkþÁV¿ú)k È Š(Í™´Ó×½.áëK^7c¥k)ã >=ÆZþµm©1Öæ•Êi±F9’ ¸+æ‘"¨E-´‚HL {àué@ EPEçPdR×ñ HÔµm66Òoue¼¶Réc¦j)e%ÖY-#)¨,qŽIúU†—Ój_4;»NMJi-þ{™« ASŸ¼W7ÝÝè«¢ŒŠ(¢ŒÑ@&áëMn§Ž1Ô×ÏÖÞ9Ó|Añ£Ãš³kI½º´·µÛ!Kxš1-ƒùårìzà¸BQMRœþ´ê(¢ŒóŠ)2=ik‚ø«aâ-SDÓ,<1¨=–¡>§RóìçËòä-ÎàX * œ.pq@æE/Q^sðvâö_]Á«]ê7:­¦¥=­é¾ŸÎÙ*m#lŸ.Þÿ{v8Åz(#ÿÀHÿøåV¿øÕá}OO¹°¼ðïˆäµº‰¡™>ʃr0!†D¹ô¯,ü(ü+—ë±ìÿdGùT°ø×á}3N¶°´ðïˆã¶¶‰a‰>ʇj(À2äð^k­ð_Ä 3Ç3j)§YêÏcåy«y!;÷mÀVoîž¾µó÷á^‰ð#þCž0ÿ·/ýZÒW7c“Ž IßSÛJiëNíIÜVçšy¾Ÿñ†ÓR±ŽîÃÁ¾1º¶“;&·ÒÖHÛƒ†ƒ‚ü*sñDžžñÀÿ¸?ÿgNø%ÿ$‡Bÿ·ý%z(Âíãðe¶±¬Ÿ ?äCñÇþ ÿû:<]ÿ%wáàãûKÿIÖ½P‘x—_Ð<^–ë®ü2ñµØ·,bΖèP°ÁÁIçò•¥¥xúÇCÒíôÍ7áÏm¬í×dQG£ð£þûÉ$ä’y$’I'5阣ÊøOÆöž-¾Õ-!Òõ]:çNò¼øu(˜¯Ê =<ãï ê‡Jóÿ ÿÉ^ø‹ÿpÏýÕ訧­yÆŸñ†ÓR±ŽîÃÁ¾1º¶“;&·ÒÖHÛƒ†ƒ‚ü+Ò;Šà> É!пíãÿGÉ@ ÿ GœÿÂã§ö?ÿgY×~6Óïµm;T¸øwㆽӼϲÈ4¦_/ÌP¯À“ ¹éÆ+Ô±F(Ïcø«iý¡§ÙÝø[Å:Û†{íJ†Aæ+ò‚OEÏ8ûÕÕâ¼ÿÂòW¾"ÿÜ3ÿD5zéM=iÝ©;Šó}?ã ¦¥c݇ƒ|cum&vMo¥¬‘¶  øUƒñG#þD?àŸÿ³¥ø%ÿ$‡Bÿ·ý%z(Æ4½GÂú6¿.»§ü,ñ”:œ¥Ù¦þËvÚ\å¶«HUsÈù@à0+¥â­§ö†Ÿiyá_iÿoºŽÒ¯´ñ~c¶$¿ãÆO½ ç߯ƒ'ÅV?û=zýÑšZAÒ–€8ß|A´ðCþÅÖõ;×µ{4ËQ>ØË”É+éŽEQÿ…§ÿRŽýÁÿû:á ñÿR®òj»ð8çåúçôÿééZ¯Ã¿\YO·Ì‹û-“vÖ 9Yê ðkA~)aF|ãŒ÷ÿ‰Gÿg^ƒŠk öÍsþñe§Œtû»Ë;;ëO²Ý=¤°ÞƱȲ(RÀ€N>ðóÅtC¥yÿÂÏù?ìj¾ÿÙ+Ð(;Šø÷Ãÿò¶ú7þ„kì.â¾1ÑN¢4˜?fòðvï ž§®+ C\º»®OÊ»P‹nÝ>FýŸ»Võ³ü›ühÝ«cþ\ÿ&®>hÿ1ôŽ–#­9}Ã|@?âGsô_ý kì!_kÚ'J›Îû/—Æí·u¿…}œ¸ÅvaÚqÐù¼Þ2uϺ‹^)ñßsÂKßý*ö¬Ö¼Gã÷›ý­á#Ï0 ÌoéÒ.¿†kYü,á¦ëÁ.èóãÖŠÏά ÿ/ɨݫÓŸþ?^w4™b©b-ü9}Æ…z'Àž5Ïgþœ¿ôkÊ÷jýþÅøn®Çáäú•·†>&\Ár–×±iðȳFåÛÇåÁ6:6~SƒƒŒVøvœ´g•›Âª œàÒ¿SèìÅ&ày¸¸¾!Xb=àF÷Þx³’X﬑ZMÛ “ ôL0Meè·‚ÛÅzq¹ºþЇX»¾m>úÓÄOgpnH@«É”Þgƒ]‡Î¾ ¿t<çþ^?ô|•èÍ|Ãà߉*Ð|%c¦i¶ú;ZÁæÚæ)KüÎÌrUÀêOn•½ÿ ‹Çóéáßûó?ÿ¬ziٳѧ”cªEN4ÛOSè óÿ¼ü"×?í‡þ޼ûþŽçÓÿ÷æþ9X^4ø‘â¯øJûLÔíôd´˜'˜ÖÑJ$ù]Xcs‘ÔÃ4•zmÙ0©”ãiÅÎtÚHúzŠAJ+cÎ<ÿÅßòW~ÿÜOÿIÖ½¼¿âGö¿ü,o`ý‡ûOþ&>OÛ÷ù?êSvíŸ7ÝÎ1ß¡‹ÿõ#ÿäÝzçÿñwÿêGÿɺ?âïÿÔÿ“txOþJ÷Ä_û†膯@í^_ðßûcþ7¿·¾Ãý§ÿï;ìüŸõO·nÿ›îíÎ{æ½Cµ'q\Á/ù$:ý¼èù+¿î+Ä>ÂÇÿ…s¤ÿ`ÿÂ+ý™ûï'íÿhó¿×>íÛ>_½»íŠ÷ +ÏÿâïÿÔÿ“tÅßÿ©ÿ&è>*?á Ïý V?û=z¯ñ‡ü'Ú>ÿ„£þß±ÂIeå/Ïó<̶3æq·³ß8¯g(kÏü]ÿ%wáßýÄÿôkÐ+Ëþ$kÿÂÆðöØ´ÿâcäý¿“þ¥7nÙó}Üãñ@¡Eyþ>/ÿÔÿ“tÅßÿ©ÿ&èÐ+Ïü'ÿ%{â/ýÃ?ôCQÿþ¤ü›¬ÿ†ÿÛð±¼}ý½öí?ø—yß`ßäÿª}»wüßwnsß4ê©;Š^ÔÅp¿äè_öñÿ£ä¯@¯ø]ÿ þΓýƒÿ¯ög?·ý£Îÿ\û·lù~öìc¶+°ÿ‹¿ÿR?þMРWŸ|T Ÿú¬öz_ø»ÿõ#ÿäÝrþ0ÿ„ïûGÁÿð”Â;ö/øI,¼³¥ùþg™–Æ|Î6ãv{çíŠAÒ–€<üÿÉÂÜ©ÿ·Uè¯(ñ'ü$gãœcÃÙ_nÿ„hoþÓó<¿/í'8òùݸöÍkÿÅßÿ©ÿ&èÐ)23\ü]ÿú‘ÿòn“»ÿäÝ/¿ù?ìj¾ÿÙ+Ð+Íþ}°éþ+þÐò>Ûÿ -ßÚ>ÏŸ/ÌÛ훹ۜã<â½"€¸¯4ù[}þ„kì>â¾<Ðä mô?ú®ðשõ<%þû/ð¿Í4QK^9ú+fv¹ÿ k“ŽÃÿBôñÇü ÿÚëçÍw?Ø·Ô ßB¾Â^œW³€þõ?9âß÷Øÿ‡õg/lì{÷ùŸðcß5ômx§Çoù øHû^ÿè1WM])ËÐðòøóbé/ï/Ìó~Ô>NqóïÏ_lTYÔ³ÿ.¸ï÷ªáÎM%xJ¥–ÈýVX>wwR_yÁº!~ÎaÏ;¼Ìþ˜®»á"»×µû{8´I´ùZÑ5Xï–F/ 1T¸!øéÛ5ËW¤|ÿ׋þ–_ÊZìÀJójÝœâº*8u>g¬–—ÓfvÚ¿Ä[ö·ðÔ^‡F‰-bºYÕÑB‚å€lW”Ûø+â­Ÿˆ¼Aâ4i¶z³ÎðI Š.Š>胦Ÿtî$1*Û¾lçè¾Õ¾½SàOŽô?û:ÞY‡ ŒgvrzÖ¶k7Bÿ-¿Ñ¿ô#Z8>•óø‡z’õ?]Ê#Ë‚¥­ýÔÿÍgëßò¹ú/þ„+CÕŸ¯ø’\ñØèBŠÅ©y¯û•_ð¿Èû¡¥µ&sô :×Ð3ñó€ñoü•߇÷ÿÒu¯A¯>ñoü•χ„ÿÔKðýÂ×~:P© F)Z=@à|'ÿ%wâ/ýÃ?ôCW v¯>ðŸü•߈Ÿ÷ ÿÑ ^‚:PwÀ|ÿ’C¡ÛÇþ’»þ⸂_òHt/ûxÿÑòP QEçÿÿæJÿ±®Çÿg¯@¯?ø¦yðWýV?û=zé@yÿ‹¿ä®ü;ÿ¸Ÿþ“­zyÿ‹¿ä®ü;ÿ¸—þ“­zfŠ+Ïü'ÿ%{â/ýÃ?ôCW WŸøOþJ÷Ä_û†膠@íIÜRö¤î(€ø%ÿ$‡Bÿ·ý%zyÿÁ/ù$:ý¼èù+Ð(¯?ø§ÿ2Wýv?û=zyÿÅ3Ï‚¿ìj±ÿÙèÐ( t¢€<üÿÉÂÿÜ©ÿ·uèççþN=¿áÿÛªô ÐE„Zà>ÿÌéÿcU÷þÉ^^ð³þgOû¯¿öJô Nâ¾Sð—‚ïµ YßE®ýš9Câ²Û‡a×pÏLþ5õgq^ ðÛþD /é/þzñ³ÌEJe:{ÝtO£îzyS’¬ù[Ztm~FGü+Kþ†aÿ€ ÿÅÐ~ê_ô3ü_þ.½ŽõòÚØ®ëÿ_ä}5Oç—þÿÌò_ø.÷HðÅåìÚà¹Hön‹ì7eÔu }søWÕc¥x'ÄŸù5?ûeÿ£R½ðWÖ䘉â0îsÞ梁#ç³[ûevÞ[}_p¯øÕ§I«xŸÁ¶PÜýšI~݉|½øÂFzwÎ1ø×±×•üOÿ‘ÿÀ¿÷ÿÑI^†.N'%ºOò8°ßƇªüÏ?ÿ…s©dãÄØÿ·ÿâ©á\ê_ô3ü_þ.½Ñ_ólWuÿ€¯ò>¶õ?ž_ø¿ÌóïøW:—ý Ãÿÿ‹®¿à®.“âØÍuö©"ûfòönÊHzdã®? Ò¦ü1ÿ‘ÿÇ÷ÿÑO^ÖE­ˆÄJ5µ»%ÕvG™š¹º+šMëÕ·ùž©ÚŽi{RwõGÏŸ8ø á5ÿ‰ü§ëø£ìqÜy˜·þÏY6m‘—ïoÎÜôï]'ü(Oþ‡Qÿ‚¥ÿã•Õüÿ’C¡ÛÇþ’½³t©½\Q×ã²Iy³Å?áDjô;ü¯ÿ®oÇß ï¼1à­CX›Äâõ-ü¼Ãýž#ߺE_½¼ã³Óµ}^ñ·þI¹ÿl?ô¢:*iÝ$9f¹ÅÆUdÓóg}Ž:RÕt¿¶}Fk—ý*’ia¸Sž‡&7ü«3Zñ~‡ \ýŸP»uŸÈ{¦Žygd…xiF­µ~ó`uô­Æp? ¾6ðAÒõìûÑöÿ*çÉYv~î<ü­ÁÈÈç×5GÌø‚:xÿþÀöõ¡ñ"xn¼mà ‹yRh%K÷ŽHØ2º˜c ‚8 ŽsSµóÞe‰ÂÖŒ(Êɫ컾籗áiV¤å5ÔÉó~!Ñ@ÿÊ=½þ ŸùŸÿòoZÔWý»þÁ‘Üòì7ò•þÿhxà꺇ö…éû›sä¬[ÿvøùWÇ¥zÈé^YðÇþGÿÿÜ?ÿE=z é_q…›©F–í'÷£ç«EF¤¢¶LNâ¼Cáw‡|e}ðçI¹Ò¼wý—bþw—gý‘ Þ^&p~v99 Ÿlãµ{q\Á/ù$:ý¼èù+ È?áø‡ÿE;ÿ(6ÿüUð‰|Cÿ¢ÿ”þ*½Šñh~*Ó5Í­øÃûfÙ¼Id‰oý™¶×Ëû”’xcßÚ½œt®âŸüÉ_ö5ØÿìõèW—üH´Ôo¾#x ÛJÔÿ³/_ûGË»û:Ïåâ'äcƒç¦sÚ½B¼ÿÅßòW~ÿÜOÿIÖ€øD¾!ÿÑNÿÊ ¿ÿGü"_ÿè§åßÿН@¢€<ÿþ/ˆôS¿òƒoÿÅVÃ{MFÇâ7­µ]SûRù?³¼Ëϳ¬>fbr>Eà`=ñžõêçþÿ’½ñþáŸú!¨Ð;Rw½©;Šñ…Þñ•÷Ã&çJñßö]‹ùÞ]ŸöD3yx™ÁùØää‚}³ŽÕØÂ%ñþŠwþPmÿøª> É!пíãÿGÉ^@ÿ—Ä?ú)ßùA·ÿâ«—ñ†‡â­3Pð|Úߌ?¶m›Ä–H–ÿÙ‘[m|±¹I'€F=ý«Ú+Ïþ)ÿÌ•ÿc]þÏ@øéKEäþ$ÓµOãšC¡ë¿Ø×Ká°íqöD¸ÞŸi ¦×Àsíï[ð‰|Cÿ¢ÿ”þ*ƒÿ' ÿr§þÝ× PŸÿÂ%ñþŠwþPmÿøªCá/ˆ}þ&äØßÿНA¢€<ßàì7úŠá¼ºûUÔ~%»I®<°žkgÚ8\œœ™¯H¯?øWÿ3§ýWßû%z'q^ðÛþD /é/þz÷Îâ¼á¿Ã ëÞÓ5-KJóï'ù’}¢U݉]G ÀtµxVÉ>Ñ+į́§†b×{ÿ SáórÞçþ¿gÿâëíxzT¥…n“ms=Õº/6yxÚέKµmÿ<ãúW–|N;¼}àR:íý•—㇞ð–¡àûíJû-Ìž$²…¤ûD²e bFˆê£ò­?‰Ú=޹ãïiº”}¬âÿÌ{.í±#Arë^¦1ÅaªsmÊÿ#š”œjFIuhÍ7þ»èù7?ÿIÿ ‡À¿ôÿɹÿøºüÙÖÀ<¿ðÿÉïö„ÿ—ñÿ€>›ðÃþGÿÿÜ?ÿE='ü*ÿÐÿ&çÿâèø_£Øh>>ñÞ›¦AäYÃýŸåǽ›‰ØòÄž¤÷¯‡*a¥‰’£&ß/T—UæÎ É!пíãÿGÉ^^ð»áwƒ|GðçIÕµ]í³ùÞd¿i™7m™Ôp®àÒ»øR_è^ÿÉÛþ.€=¼ÿãoü’-wþØéDtÂ’ø{ÿB÷þNÜñuÇüQø]àß|:Õu]+Gû=ì/._´ÌûwJŠxg#£Ýè·Õ´PxËSÔ´[(íînì-j+,1É$¾b34R¶J˜å™Fá´W=âMGQðæ“ö‹èÓU•-†³¥jVëw«Ž&s º¬ëNã76HÝ]Å }KBÒ¬.ãó-nµ«(&MÄnF”Ž éƒïX"ø á}KOŽ N‹r²‡kŒËq¹0~]­&$õãÞ€25û«k­Wá¬vú†~ö¶÷–Ó˧Hš¶ñn pp8­Ü×ð‡@ðßÅfŠÀ L2Íœ=Ï\ögá‰ÉÐùÿ¯¹ÿøºøÞ#©††&*´š|½}_š=L)Ò¦â•õFiŸð¨| ÿ@?ü›Ÿÿ‹¤ÿ…Eàlñ¡äÜÿü]|÷·À<¿ðÿÉŸÚþOÇþ€/môÏ|C¿¼“˵¶ŠÊi_im¨°ÈXàdœÚ½o `g­x~…á;D»ø§á½O¢k+Tµ¶‘·5 ‘”åÏ]äIààö®ÞÃLñ|WöφÏIYP­ˆ½·LñŲ=åÚ%=0ø«ô¬‹ÃSqÛ•~G‡V\Ór}N—OñŸªßOkfnàyݬæH·Få B1 àž‡Ð×)ðLãá‡ÿoú>JÊðÿ‰´ý+ǵ†¥s‡þÌ®×öwº°’®e—ÌIm¼Ã”.͜ʠ®FG-ðãáÏ„õïéšž§¤‰îæy’›‰W8•Ôp¬E𬳠ž’«U6¯m?¤L äì}Í.kÊ¿áPøþ€_ù7?ÿGü*Ðà\ÿü]x¿ënùe÷/ó5ú¼_Šó%ûxªÄÿèuè¥xgˆü á¿ j¾½Ñ´ß²Ü?ˆ¬âgóä”±$a˜Ž ~UîC ¯s§¢«S½Ÿs)G•Ù‹^âïù+ßÿî'ÿ¤ë^^_ñ#DÓ¼EñÀzV«mö‹+í2-ì»¶ÂŒ9RåG~ÕØIꢼÿþŸÃÞþÿÉÛþ9Gü)/‡¿ô/äíÇÿ@^á?ù+ßîÿ¢øR_è^ÿÉÛþ.³þèšw‡>#xûIÒ­þÏeöw—ö}»¢v<±$òIë@¡Ú“¸¥íIÜPðKþI…ÿoú>Jô ðÿ…ß ¼â?‡:N­ªèÿh½ŸÎó%ûLÉ»lΣ…p•ØÂ’ø{ÿB÷þNÜñtèçßÈÿŠ/ÛÅV'ÿC¥ÿ…%ð÷þ…ïü¸ÿâë—ñ‡ÃÏ xGPð}þ‡¥}–æOY@Ò}¢WÊÄŒ3ÕG4í¥ƒ¥-yñ#þŸO ÿíÕzkÉüIá}Å¿’Ã[´ûU´~¤~k¦\•*AèÇŠØÿ…'ð÷þ…ïüŸÿ‹ @¤$Wÿ Káïý ßù;qÿÅÒ‚»xxàdÿü] ÿætÿ±ªûÿd¯A¯7ø;eo¦éþ+°´Ë¶µñ-Ü0¦âÛQDaFO'€:פPwå?ä–h¿I¿ôt•êÅxȾм¦iº–¨`»‡Íó#û4­ŒÊì9U#¡ëç8¢…Zø8Æ”\Ÿ2Ù_£5£$¥©ì†¸¯ø[~ÿ ÙÿÀ9ÿøŠOø[~ÿmüŸÿˆ¯Y^;þ|ËîgW´‡t'ÅÑÿ»Y©ÿèèëÕExÄoˆ¾×<©éÚn¨g»›Êòãû4«œJŒyepz÷µ9÷Ü1B­ £V./™î­Ñµ¤¥+£€ø§ÿ2Wýv?û=Cãù*>=±¨ÿè•©¾)ÿÌ•ÿc]þÏYu‹ ÇÞÔu)¼›H´<É63c1"Žy#µ{ø¹á*Æ*íÅþFqvi·–¸ø[žÿ ÙÏýzOÿÄRÿÂÛð8ÿ˜ÑÿÀIÿøŠü©åxïùó/¹ÞÒÎÚ¹_ÿÉRøƒÿpßýÕOþßÿè6ðþ"“ᆯc¯xûÇz–›?Ÿi7ö—&Æ\â'SÃzƒÚ¾“…°Xš¹Ê¬W/T×TcZqqÑž©Ú“¸¥íIÜWÞÇðKþI…ÿoú>Jô ñ…¿¼áχN“«k?g¾ƒÎó"û,Ï·tÎÕBµ×ÿÂíøyÿCþI\ñºô óÿ¿òHµÏûaÿ¥Ñÿ ·áçý ?ù%qÿÆëø£ñCÁÞ$øs«iZN±ö‹ÙÄ>\_f™7m™òÈáOzïþ!Ç–ÿcŸÿ£–ºúãþ!Ç–ÿcŸÿ£Ö»Ô˜uãù*_þš—þ‰ZêkˆøŸ«Øè^=ð.¥©OäZ@52MŒÛs(áA=HíIÿ {À¿ôø ?ÿ_ Å8,M|T%F›’åè›êΚJ:Å&+ˆÿ…¿à_úüŸÿˆ£þ÷?è:?ðþ"¾gû+ÿ>e÷3~x÷3´ïx{Ƶӓøzæ^ÿÉËþ.²¾k÷¼u©i—}œßÙþ\›wb'SÃzƒÚ½Pt¯Õ°”0´£%f¢¿$pÊÜÎÇÜ~Ï>¸Õï.>×}mbû>ÍgjÀy8\6^MåòF{c$sÅ3àÆ“cià+MNÞ·wá¾Ó.ö&M’È«Á8‚½‡¸¯øoñÂz€tÍ3RÕ„y¾dg•¶æWaÊ©ï^OÓ¯S¡A6ܕҾÖ}ºliE¥-Oa)k‡ÿ…¿à_úüŸÿˆ£þ÷?è:?ðþ"¾eXïùó/ü\ñîIñýƒìg²þm^ :W†ø“Ç^ñ>©á=RûUÂxŽÊVO"D†#9e©W¹•ú/Q©GT‹‹»ÑúœuZrº¼ÿÅßòW~ŸOí/ý'Zô òÿˆúΟáÿˆþÕ59Ìphy’lgÆèQG <‘ÐW¶fz~E.k€?>ùŽŸü¸ÿãtÀÿóà Çÿ¥q\ïó^á?ù+ßîÿ¢ÿ §áÿý›ÿn?øÝfü8Ö´ÿ|Gñö«¥\}¢Êìï.]Œ›¶Äêx`äȦ;ž¡Ú“¸¥íIÜPðKþI…ÿoú>Jô ñ…¿¼áχN“«k?g¾ƒÎó"û,Ï·tÎÕBµ×ÿÂíøyÿCþI\ñºô óÿŠó%Ø×cÿ³Ñÿ ·áçý ?ù%qÿÆë˜ñ‡Ä? x»Pðuއª}®æ?ÙLéöyS åÔ¬8ë@ÏEPŸŸù8_û•?öî½¼£Ä~(Ñü%ñÑ5 nó춯ᡠÉå<™srH@OE<ô­ø]¿?èaÿÉ+þ7@Eyÿü.߇Ÿô0ÿä•Çÿ¥ÿ…Ùð÷ÿ„ƒúò¸ÿãtŸ ÿætÿ±ªûÿd¯@¯7ø={o¨Øx²úÒO2ÚçÄ·sDûJîFpyÁæ½"€¸¯œüÿ"NôýÕôgq_9ø þD;èÿú1«ÄÏŸû2õýó¼LÚÂFßÌ¿&t”QE|ßsáy™Îxó?ð…êDçÓçZú-F|éą̃Œã„ÿÐÖ¾‹Zúü‰·†wïú#î¸e·„wþgù#€ø§ÿ2Wýv?û=c|WñZø+Ž×ßÉlüSçþ¯û¬özÇø­ÿ#¯‚¹íÿ¢ã¯Kþï?Gù¾?ýÖ£þëüŒîh¢Šüû™÷?.æ}´>Èë㥇þ%gÖ‡Âù|oô°ÿÐ$¯s íôýQô|3&ñR¿òþ¨õžÔÅ/jNâ¾´û“€ø&?âÐè_öñÿ£ä®ÿÀ|ÿ’C¡ÛÇþ’½€À|lãá¹éûýzyÿÆßù$Zçý°ÿÒˆèÿÄ/øñÐ?ì`Óÿôz×`+ø…ÿ:ýŒþŽZìGJóïÿÉ\øyÿq/ýµèWŸø»þJïÿû‰ÿé:× ÐbŒRÑ@}á?ù+¿?îÿ¢½t®ÂòW¾"ÿÜ3ÿD5zjN⸂cþ-…ÿoú>JïûŠà> É!пíãÿGÉ@þ(Å-ç߆ƒ;ÿÅSb?ô>+¿b¸Š}|žŸð”Øÿìõ¯iâøb¼×S\0i6¶ëiÅÜñÄ“f“Þ~c¸¶0>V^á€êr+Ê>+ÿÈëà¾ȾÿÐ#®›Qñ5æ«yccàËíòI¢¹šK©¥3BžPŒyxˆýâÓG“Ÿ•rprq9Ô¿¶u?‡·—ä‹ÛK«Ÿ+vvo†&Æp3ŒõÀÍsâÿÝæüŸärcÿÝj…þAžhæŠ+óþgÜü»™†kCáGü޾7úXèV}h|(ÿ‘ׯßKýJöò|D½?T} 6ñR¿òþ¨õžÔÅd{uÍ}i÷'ðLšпíãÿGÉ]þ+€ø%ÿ$‡Bÿ·ý%z&+Ïþ)Œž¿ñTØýŠôóÿŠó%ÿØÕcÿ³Ð ”QEyññ˜ÿ©[?ù5^\ÿ“…ÿ¹WÿnëÐJLRsÇ_Zu&yÅp O?ìj¾ÿÙ+Ð+ÏþÿÌéÿcU÷þÉ^@ ÜWÉñÄZO‡­,[O–Sod±?Ö¾·î+á‹OøõéýkÓÊòL6qYáñ7åJúwÛõ8qøjXŠjUÕÏFÿ…“ýçÿ¿«Gü,˜?è?ýýZóú+Þÿˆq”w—ßÿò?±ð_Éø¿ó:Ïøâ[Ã×VKa,M.Ü3H`{}+Ýu¿ˆ׆ôyµ]WÁÐXÛíó%û}»íÜÁG Äœ–¥|±wÿ¯øWÔÿ?äëŸöÃÿJ#¯4ÉpÙ=UC {5}~ïÐö0zxzNÕ•Ãâ—Ogþ†«ýž¹Ÿúšèþ!ð…óÄeXÅèØ ÊÄ?­tÿÿæJÿ±®Çÿg®öŒÿ¯ ÿÛßò†¸ðøhbªÆ…M¤Ò~C¦´éÊ2Ù£•?`Ïü‚çÿ¿«Gü,˜?è?ýýZóú+êÿâeå÷ÿÀ>{û#üŸ‹ÿ3Ð?ádÁÿ@¹ÿïê×wð?S]cÄ>1¾Xš! ±Â1ÉYGô¯¯jýœÿãëÅ_öçÿµ«ÍÍ8O“ÑUðÍÝ»júoúØ QÊ”líæ{ÇjNâ—µ'q^ëž9ðŸ]ñŸÃ="ÞÇÁ×z…²yÛ.£¾·_398WpÑÈí]§ü$þ+ÿ¢}}ÿƒ+Oþ.³þ É!пíãÿGÉ^@ü$þ+ÿ¢}}ÿƒ+Oþ.¸¿‹ïˆo>jð_x>ïO·'}Ëß[È©ûèÈÊ£’rp8ëÙ+Ïþ6ÿÉ"×?í‡þ”G@þ!ÇŽÿcŸÿ£–»Ò¸ÿˆ_ñå ØÁ§ÿèå®Àt 3ø…uwgñ3ÀØéòj ý£²Ú9V6Ü ?3£'“Ú·?á'ñ_ýëïüZñuŸâïù+¿ÿî'ÿ¤ë^@ü$þ+ÿ¢}}ÿƒ+Oþ.øIüWÿDúûÿVŸü]vP™ü>º»¼ø™ñâûO“O¹ìíö²J’2b,„©Èð{צv¯?ðŸü•÷ ÿÑ ^Ú€¸¯øO®ø‚Ïáž‘ocàë½BÙ<í—Qß[Ư™œœ+¸a‚Häv¯cî+€ø%ÿ$‡Bÿ·ý%hÂOâ¿ú'×ßø2´ÿâèÿ„ŸÅôO¯¿ðeiÿÅ×aEyŽ5oP½ðlZ—…ît¨‰¬™f–î C6XmÂ1= 9Æ8­ÿ›O )»hõkç×ukxc³´¾6»n >ZáУa–53ÈšOŠó%Ø×cÿ³ÑñOþd¯ûìözà~#k>"ð׆íï,ôM_C¹iÞÔ_ÝjËzæ9S/“#²’aF 1´¡ÁŽy½Çs\ÜxBâOÝèú~›m*Y,’»,ñ4hªQ™W ^rs‘Í}>TœŸ_ZðŸÚ+"ë¼õû_hkl>ª±¡=¤Ò= ëANœ£-š9sñ&Ü•?ýüZOøY0Ð.ûúµçôWÕÿÄ8Ê;Ëïÿ€|÷öFù?þg ÂɃþsÿßÕ®“Έgß¼Eej‚{{;yãŠv;sSHë÷z~kÆëÐþ\ë"ßÅVg…·í¯b‚´þÐK_- Ê1–äî ÃŒq^niÂx žŠ¯†níÛWÓÐîÀ`pøzŽT£go3èñ>ž³ÉÈ»¶xUŒÒÏcp–ñí±3¼j›88l€{u¬/xí¼+ÿoÊî>)ÿÌ•ÿc]þÏ\gí o%Ö¡á8b*¾Ù‚Ý:E^Fjªr}$¿1ò¹û±Ýž#ŠJÓ>¿ÿž¶ß›…ðßÿÏ[oͿ¿PþÕ‡òKî'û+üŒÌ¯jýœÿãëÅ_öçÿµ«Ê?á¿ÿž¶ß›…zçì÷o%¦¡âØe*]~Ç’:Jkçx›øXÅE¯{ªòcX*ô=ꑲ=ϵ'qKÚ“¸¯†À|ÿ’C¡ÛÇþ’½¼?áwü,øW:Oöü"¿ÙŸ¾ò~ßö;ýsîݳåûÛ±ŽØ®Ãþ.ÿýHÿù7@^ñ·þI¹ÿl?ô¢:?âïÿÔÿ“uÇüQÿ…ÿ çUþßÿ„Wû3÷^wØ>Ñç®M»wñ÷¶ç=³@ÿÄ/øòÐ?ì`Óÿôr×b:Wñ þ É!пíãÿGÉ]ÿq^!ð»þ?ü+'ûþ_ìÏßy?oûGþ¹÷nÙòýíØÇlP¸Q^ÿþ¤ü›£þ.ÿýHÿù7@ ñPÿ^èj±ÿÙé~)ÿÌ•ÿcUþÏ\¿Œ?á;þÑðü%ðŽý‹þK/,é~™æe±Ÿ3¸ÝžùÅtÿ¿æJÿ±®Çÿg A¯ý£?ãë¿ö÷ü¡¯x¯ ý¡-äºÔ<) EC·Û0[§H«¯5 U9>’_˜r¹û±Ýž#ŠJÓ>¿ÿž¶ß›…ðßÿÏ[oͿ¿PþÕ‡òKî'û+üŒÌ¯jýœÿãëÅ_öçÿµ«Ê?á¿ÿž¶ß›…zçì÷m%¦¡âÈe*\}%:t”×Îñ6:5𱊋^÷U䯰Uè{Õ#d{‰ÿ­M µ¸üiù õ¯†ðůü{'ãüêjí|%ð‹ÄÞ(ðÅž³§Ýé)ks¿bÜK qµÙN@B:©ï[_ð ¼eÿ?úýÿ›ÿ×è8#ÀÐÜ›ºI=:Øä•7ty…kxOxhÿÔZ×ÿF­w?ð ¼eÿ?úýÿ›ÿÕsðËÄ ñ/…uVëM– uÛKu[Y$f _wñ ŸΣ2â' :Pníd¥sé¡E àsÅÅ|:Ž‘ÿ OýJ¿ûu^šñ?ˆž"Õ¼3ñ† í;'ô…¾Ø®P)¸sÆÒrgÿÂÜñÐÿ—_þ1OÿÇ+®Ž ½hóS‹hÞžµEÍÜ÷ÜÓKÞ¼þç×ß÷æþ9Gü-Ïäæ×üÿÓ)ñÿ£+_ìÌZÞ Óê8Ÿäg{ð³øM?ìj¾ÿÙ+Ð+Ì> ]O} x†öìF..5û‰¥{$díÏ8É5éõÂÓNÌåi§f'q_!øwþ@6¿FÿÐ}yÞ¾Cðïü€-~ÿ¡÷¸wýéú~¨õòO÷‡éú£S4 àƒIŠ1_it}MÌÏÈëè?ô!^÷ñ³þI¹ÿl?ô¢:ðOýƒr=Bþ?0¯{øÖsð‡\ÿ¶úQ|W½/OÕŸ-ÿ¼/OÕ‹ñOþd¯ûìözå¾9øŸxCé{ÿ Å]OÅ?ù’¿ìk±ÿÙë–øäâ}áz Üÿß1W—‚ÿy§ê¿3Ï«ó8 õô™¥#šJý&èûÍzÀÏùxÃécÿ Ë^ŠôŸòñ‡ÒËÿA–¼"kêÑõýãgÀ¯èÏiíIÜRŽ”ëâÏ—8‚_òHt/ûxÿÑòW WŸüÿ’C¡ÛÇþ’½€ óÿ¿òHµÏûaÿ¥× WŸümçá¸?ë‡þ”G@¿ãË@ÿ±ƒOÿÑË]€é\wÄ#›ú4ÿýµØŽ”çþ.ÿ’»ðïþâúNµèçÞ.#þ÷þzié:× ÐEPŸøOþJ÷Ä_û†膯@í^á?ù+ßîÿ¢½t î+€ø%ÿ$‡Bÿ·ý%wýë€ø%ÿ$‡Bÿ·ý%zQ@ñOþd¯ûìöz>)ÿÌ•ÿc]þÏGÅ2?â‹çþf«ýžŠó%ØÕcÿ³Ð W‹|sñ>ð‡Ò÷ÿAн¦¼[ã™ÿ‰÷„yè/sÿ|Å]X/÷š~«ó:0ŸÇ‡ªüÎ=G½&iHæ’¿Iº>ä3]G¤ñëÞ'ÿ„~}*, O?íðÉ&~Y6íØËâÎ}«—ÅzÀÏùxÃéeÿ Ë^5õhúþŒñ³·ûˆúþŒôë¶­±²,Ö¨½žë(Ý‚Çg ÌsóŸN•ÓãÈxþ?¼aÿc¿ú&ì;×ÅŸ.pÿäè_öñÿ£ä®ÿÀ|ÿ’C¡ÛÇþ’½€À|Sÿ™/þÆ«ýž½¼ÿâ™ñEóÿ3UþÏ@?ñ«áëMû,ÍÖ¥«A§vŒÈ¢@ß1 Àõõ®{Æ:?ÅMSB6Ú~©£E)•78Km1Ló‰ÛN0HR2s´èüSÿ™+þÆ«ýž»úùwŶ^0²ñÄ1x£S²¾Ô[NVY¡L*Âd`„O›pcÓ¿Z‡9æºß‹„ÂѶäq£Gÿ£ä®Köù–÷ê}^Mþï{…¨Å(è}ø¯nRI³}ÏUøsámgþÃS踫ԗ¥yoÀ¢?áÖ¿ì57þ‹Š½Ht¯Ì«ÿ^§ÁUþ$½D úוÃððD±Ç­ø…z*\Æú.½ZŠˆÎQw‹± N:¦yoü(þƒÞ#ÿÀ¨ÿøÝ'ü(þƒÞ#ÿÀ¨ÿøÝz~Þ¯ó?¼¿k>ìò‰¾xzhÌrk^ ‘ª½ÌDü‡Z¿óÿ ƒ\Ï_ÜéDuèUçßOüZ=pwýÇþ”GQ)Ênòw!¶ÝØ¿úø+þÆ«ýž´|iðÿMñÄ–ßÝßÛµ™äµœŠ„ïÛœ–VþèéŽõ{ž³ñŽŸkiwy}h-n’î)¬eȲ(`¤1xœŽxÖü*Ï_xãðÕÿû IÙÝ i©™ÿ 'Aëý½â?ü ÿÑÿ 'Aÿ ÷ˆÿð*?þ7Zð«?ê|ñÏþ ÿû ?áVÔùãŸüÿö§·«üÏï/ÚÏ»3?áDè?ôñþGÿÆë¥ðgÃí7Àòêi÷—÷/åù­y"¹7cU¾*Íÿ…YÿSçŽðoÿØQÿ ³þ§Ïÿàßÿ°©•IÉZNâs”´lïÅ!Ö¸øUŸõ>xçÿÿý…ð«?ê|ñÏþ ÿû ‚Jö?m4Ë8ìì|eãKhó²}LGä’p¡02I?Xÿ…YÿSçŽðqÿØQÿ ³þ§Ïÿàßÿ°£þgýOž9ÿÁ¿ÿa@ü*ÏúŸµÃ^ü0·ºÕ¯µ øŸMkÙÌòç߈c.@ÀSؤÔ_ð«?ê|ñÀÿ¸ÇÿaPI^Çàí¦™gŒ¼cimvCo©ˆã\’N&I'ñ«ð«?ê|ñÏþ?û ?áVÔùãŸüö¬ÿ©óÇ?ø8ÿì(ÿ…YÿSçŽðqÿØQ‹Oí »¿x«Pû ÔwpÃ}¨‰cóä¥?0pO4¬ÿ©óÇ?ø8ÿì(ÿ…YÿSçŽðqÿØP|Ràx/ÛÅV'ÿC®ÞÞúÚê{¸a—|–’ˆg\±Ê+ãž¿+¡ÈãŸ\׋Oí>îïÅ>)Ô~ÃwäPßjhüÄ9RAOÃŽÄúÓχµ›MGÄ¡ÙǧÍ{©Er·i,‹ˆ¾Î¡†L3‰C± ‹’ù rAÄñöàcŦçYñ§c}im ´ÆÈ&‘ØÇçH#eqc÷ÙxÁéÍJ>è$È{Ä_…Ôün§Õ´_j<º5Æv—R°œj–7öÍÌ¥Y6Þ#Eø€òÃ*Æû•@ÁÆ¥¨ÀÅ\jÎ*Ñl¨ÎQVLòïøQ:ý¼GÿQÿñº?áEhC®ø‹ÿ£ÿãuêTU{z¿ÌÊö³îÎkÁþ ²ðV—q§é÷“Å=Á¹v»uwÞUTòxÂZéÀ¥¢²3 (¢€ (¢€ ÎÖôKé3éZ­¿Ú,gÛæE½“v0åH#:Ñ¢€<ü|ø{Ž|=Ïý~ÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñtÂ’ø{ÿB÷þNÜñuèPŸÿÂ’ø{ÿB÷þNÜñu=Âéšµýž‡åÜ[J³Dÿkœíu9S‚ø<ŽõÜÑ@:RÑEQEQEQEQEQEQEQEQEQEÿÙhpcc-1.4.1/hpl/www/tuning.html0000644000000000000000000004417311256503657013145 00000000000000 HPL Tuning

HPL Tuning

After having built the executable hpl/bin/<arch>/xhpl, one may want to modify the input data file HPL.dat. This file should reside in the same directory as the executable hpl/bin/<arch>/xhpl. An example HPL.dat file is provided by default. This file contains information about the problem sizes, machine configuration, and algorithm features to be used by the executable. It is 31 lines long. All the selected parameters will be printed in the output generated by the executable.

We first describe the meaning of each line of this input file below. Finally, a few useful experimental guide lines to set up the file are given at the end of this page.


Description of the HPL.dat File

Line 1: (unused) Typically one would use this line for its own good. For example, it could be used to summarize the content of the input file. By default this line reads:
HPL Linpack benchmark input file

Line 2: (unused) same as line 1. By default this line reads:
Innovative Computing Laboratory, University of Tennessee

Line 3: the user can choose where the output should be redirected to. In the case of a file, a name is necessary, and this is the line where one wants to specify it. Only the first name on this line is significant. By default, the line reads:
HPL.out  output file name (if any)
This means that if one chooses to redirect the output to a file, the file will be called "HPL.out". The rest of the line is unused, and this space to put some informative comment on the meaning of this line.


Line 4: This line specifies where the output should go. The line is formatted, it must begin with a positive integer, the rest is unsignificant. 3 choices are possible for the positive integer, 6 means that the output will go the standard output, 7 means that the output will go to the standard error. Any other integer means that the output should be redirected to a file, which name has been specified in the line above. This line by default reads:
6        device out (6=stdout,7=stderr,file)
which means that the output generated by the executable should be redirected to the standard output.


Line 5: This line specifies the number of problem sizes to be executed. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads:
3        # of problems sizes (N)
this means that the user is willing to run 3 problem sizes that will be specified in the next line.


Line 6: This line specifies the problem sizes one wants to run. Assuming the line above started with 3, the 3 first positive integers are significant, the rest is ignored. For example:
3000 6000 10000    Ns
means that one wants xhpl to run 3 (specified in line 5) problem sizes, namely 3000, 6000 and 10000.


Line 7: This line specifies the number of block sizes to be runned. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads:
5        # of NBs
this means that the user is willing to use 5 block sizes that will be specified in the next line.


Line 8: This line specifies the block sizes one wants to run. Assuming the line above started with 5, the 5 first positive integers are significant, the rest is ignored. For example:
80 100 120 140 160 NBs
means that one wants xhpl to use 5 (specified in line 7) block sizes, namely 80, 100, 120, 140 and 160.


Line 9: This line specifies how the MPI processes should be mapped onto the nodes of your platform. There are currently two possible mappings, namely row- and column-major. This feature is mainly useful when these nodes are themselves multi-processor computers. A row-major mapping is recommended.


Line 10: This line specifies the number of process grid to be runned. This number should be less than or equal to 20. The first integer is significant, the rest is ignored. If the line reads:
2        # of process grids (P x Q)
this means that you are willing to try 2 process grid sizes that will be specified in the next line.


Line 11-12: These two lines specify the number of process rows and columns of each grid you want to run on. Assuming the line above (10) started with 2, the 2 first positive integers of those two lines are significant, the rest is ignored. For example:
1 2          Ps
6 8          Qs
means that one wants to run xhpl on 2 process grids (line 10), namely 1-by-6 and 2-by-8. Note: In this example, it is required then to start xhpl on at least 16 nodes (max of Pi-by-Qi). The runs on the two grids will be consecutive. If one was starting xhpl on more than 16 nodes, say 52, only 6 would be used for the first grid (1x6) and then 16 (2x8) would be used for the second grid. The fact that you started the MPI job on 52 nodes, will not make HPL use all of them. In this example, only 16 would be used. If one wants to run xhpl with 52 processes one needs to specify a grid of 52 processes, for example the following lines would do the job:
4  2         Ps
13 8         Qs

Line 13: This line specifies the threshold to which the residuals should be compared with. The residuals should be or order 1, but are in practice slightly less than this, typically 0.001. This line is made of a real number, the rest is not significant. For example:
16.0         threshold
In practice, a value of 16.0 will cover most cases. For various reasons, it is possible that some of the residuals become slightly larger, say for example 35.6. xhpl will flag those runs as failed, however they can be considered as correct. A run should be considered as failed if the residual is a few order of magnitude bigger than 1 for example 10^6 or more. Note: if one was to specify a threshold of 0.0, all tests would be flagged as failed, even though the answer is likely to be correct. It is allowed to specify a negative value for this threshold, in which case the checks will be by-passed, no matter what the threshold value is, as soon as it is negative. This feature allows to save time when performing a lot of experiments, say for instance during the tuning phase. Example:
-16.0        threshold

The remaning lines allow to specifies algorithmic features. xhpl will run all possible combinations of those for each problem size, block size, process grid combination. This is handy when one looks for an "optimal" set of parameters. To understand a little bit better, let say first a few words about the algorithm implemented in HPL. Basically this is a right-looking version with row-partial pivoting. The panel factorization is matrix-matrix operation based and recursive, dividing the panel into NDIV subpanels at each step. This part of the panel factorization is denoted below by "recursive panel fact. (RFACT)". The recursion stops when the current panel is made of less than or equal to NBMIN columns. At that point, xhpl uses a matrix-vector operation based factorization denoted below by "PFACTs". Classic recursion would then use NDIV=2, NBMIN=1. There are essentially 3 numerically equivalent LU factorization algorithm variants (left-looking, Crout and right-looking). In HPL, one can choose every one of those for the RFACT, as well as the PFACT. The following lines of HPL.dat allows you to set those parameters.

Lines 14-21: (Example 1)
3       # of panel fact
0 1 2   PFACTs (0=left, 1=Crout, 2=Right)
4       # of recursive stopping criterium
1 2 4 8 NBMINs (>= 1)
3       # of panels in recursion
2 3 4   NDIVs
3       # of recursive panel fact.
0 1 2   RFACTs (0=left, 1=Crout, 2=Right)
This example would try all variants of PFACT, 4 values for NBMIN, namely 1, 2, 4 and 8, 3 values for NDIV namely 2, 3 and 4, and all variants for RFACT.

Lines 14-21: (Example 2)
2       # of panel fact
2 0     PFACTs (0=left, 1=Crout, 2=Right)
2       # of recursive stopping criterium
4 8     NBMINs (>= 1)
1       # of panels in recursion
2       NDIVs
1       # of recursive panel fact.
2       RFACTs (0=left, 1=Crout, 2=Right)
This example would try 2 variants of PFACT namely right looking and left looking, 2 values for NBMIN, namely 4 and 8, 1 value for NDIV namely 2, and one variant for RFACT.


In the main loop of the algorithm, the current panel of column is broadcast in process rows using a virtual ring topology. HPL offers various choices and one most likely want to use the increasing ring modified encoded as 1. 3 and 4 are also good choices.

Lines 22-23: (Example 1)
1       # of broadcast
1       BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
This will cause HPL to broadcast the current panel using the increasing ring modified topology.

Lines 22-23: (Example 2)
2       # of broadcast
0 4     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
This will cause HPL to broadcast the current panel using the increasing ring virtual topology and the long message algorithm.


Lines 24-25 allow to specify the look-ahead depth used by HPL. A depth of 0 means that the next panel is factorized after the update by the current panel is completely finished. A depth of 1 means that the next panel is immediately factorized after being updated. The update by the current panel is then finished. A depth of k means that the k next panels are factorized immediately after being updated. The update by the current panel is then finished. It turns out that a depth of 1 seems to give the best results, but may need a large problem size before one can see the performance gain. So use 1, if you do not know better, otherwise you may want to try 0. Look-ahead of depths 3 and larger will probably not give you better results.

Lines 24-25: (Example 1):
1       # of lookahead depth
1       DEPTHs (>=0)
This will cause HPL to use a look-ahead of depth 1.

Lines 24-25: (Example 2):
2       # of lookahead depth
0 1     DEPTHs (>=0)
This will cause HPL to use a look-ahead of depths 0 and 1.


Lines 26-27 allow to specify the swapping algorithm used by HPL for all tests. There are currently two swapping algorithms available, one based on "binary exchange" and the other one based on a "spread-roll" procedure (also called "long" below). For large problem sizes, this last one is likely to be more efficient. The user can also choose to mix both variants, that is "binary-exchange" for a number of columns less than a threshold value, and then the "spread-roll" algorithm. This threshold value is then specified on Line 27.

Lines 26-27: (Example 1):
1       SWAP (0=bin-exch,1=long,2=mix)
60      swapping threshold
This will cause HPL to use the "long" or "spread-roll" swapping algorithm. Note that a threshold is specified in that example but not used by HPL.

Lines 26-27: (Example 2):
2       SWAP (0=bin-exch,1=long,2=mix)
60      swapping threshold
This will cause HPL to use the "long" or "spread-roll" swapping algorithm as soon as there is more than 60 columns in the row panel. Otherwise, the "binary-exchange" algorithm will be used instead.


Line 28 allows to specify whether the upper triangle of the panel of columns should be stored in no-transposed or transposed form. Example:
0            L1 in (0=transposed,1=no-transposed) form

Line 29 allows to specify whether the panel of rows U should be stored in no-transposed or transposed form. Example:
0            U  in (0=transposed,1=no-transposed) form

Line 30 enables / disables the equilibration phase. This option will not be used unless you selected 1 or 2 in Line 26. Example:
1            Equilibration (0=no,1=yes)

Line 31 allows to specify the alignment in memory for the memory space allocated by HPL. On modern machines, one probably wants to use 4, 8 or 16. This may result in a tiny amount of memory wasted. Example:
8       memory alignment in double (> 0)

Guide Lines

  1. Figure out a good block size for the matrix multiply routine. The best method is to try a few out. If you happen to know the block size used by the matrix-matrix multiply routine, a small multiple of that block size will do fine. This particular topic is discussed in the FAQs section.

  2. The process mapping should not matter if the nodes of your platform are single processor computers. If these nodes are multi-processors, a row-major mapping is recommended.

  3. HPL likes "square" or slightly flat process grids. Unless you are using a very small process grid, stay away from the 1-by-Q and P-by-1 process grids. This particular topic is also discussed in the FAQs section.

  4. Panel factorization parameters: a good start are the following for the lines 14-21:
    1       # of panel fact
    1       PFACTs (0=left, 1=Crout, 2=Right)
    2       # of recursive stopping criterium
    4 8     NBMINs (>= 1)
    1       # of panels in recursion
    2       NDIVs
    1       # of recursive panel fact.
    2       RFACTs (0=left, 1=Crout, 2=Right)
    
  5. Broadcast parameters: at this time it is far from obvious to me what the best setting is, so i would probably try them all. If I had to guess I would probably start with the following for the lines 22-23:
    2       # of broadcast
    1 3     BCASTs (0=1rg,1=1rM,2=2rg,3=2rM,4=Lng,5=LnM)
    
    The best broadcast depends on your problem size and harware performance. My take is that 4 or 5 may be competitive for machines featuring very fast nodes comparatively to the network.

  6. Look-ahead depth: as mentioned above 0 or 1 are likely to be the best choices. This also depends on the problem size and machine configuration, so I would try "no look-ahead (0)" and "look-ahead of depth 1 (1)". That is for lines 24-25:
    2       # of lookahead depth
    0 1     DEPTHs (>=0)
    
  7. Swapping: one can select only one of the three algorithm in the input file. Theoretically, mix (2) should win, however long (1) might just be good enough. The difference should be small between those two assuming a swapping threshold of the order of the block size (NB) selected. If this threshold is very large, HPL will use bin_exch (0) most of the time and if it is very small (< NB) long (1) will always be used. In short and assuming the block size (NB) used is say 60, I would choose for the lines 26-27:
    2       SWAP (0=bin-exch,1=long,2=mix)
    60      swapping threshold 
    
    I would also try the long variant. For a very small number of processes in every column of the process grid (say < 4), very little performance difference should be observable.

  8. Local storage: I do not think Line 28 matters. Pick 0 in doubt. Line 29 is more important. It controls how the panel of rows should be stored. No doubt 0 is better. The caveat is that in that case the matrix-multiply function is called with ( Notrans, Trans, ... ), that is C := C - A B^T. Unless the computational kernel you are using has a very poor (with respect to performance) implementation of that case, and is much more efficient with ( Notrans, Notrans, ... ) just pick 0 as well. So, my choice:
    0       L1 in (0=transposed,1=no-transposed) form
    0       U  in (0=transposed,1=no-transposed) form
    
  9. Equilibration: It is hard to tell whether equilibration should always be performed or not. Not knowing much about the random matrix generated and because the overhead is so small compared to the possible gain, I turn it on all the time.
    1       Equilibration (0=no,1=yes)
    
  10. For alignment, 4 should be plenty, but just to be safe, one may want to pick 8 instead.
    8       memory alignment in double (> 0)
    

[Home] [Copyright and Licensing Terms] [Algorithm] [Scalability] [Performance Results] [Documentation] [Software] [FAQs] [Tuning] [Errata-Bugs] [References] [Related Links]

hpcc-1.4.1/include/hpcc.h0000644000000000000000000002110011353467335012036 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ #ifndef HPCC_H #define HPCC_H 1 /* HPL includes: stdio.h stdlib.h string.h stdarg.h vararg.h (if necessary) mpi.h */ #include #include #include #include #define MPIFFT_TIMING_COUNT 8 /* Define 64-bit types and corresponding format strings for printf() and constants */ #ifdef LONG_IS_64BITS typedef unsigned long u64Int; typedef long s64Int; #define FSTR64 "%ld" #define FSTRU64 "%lu" #define ZERO64B 0L #else typedef unsigned long long u64Int; typedef long long s64Int; #define FSTR64 "%lld" #define FSTRU64 "%llu" #define ZERO64B 0LL #endif typedef struct { double GBs, time, residual; int n, nb, nprow, npcol; } PTRANS_RuntimeData; /* parameters of execution */ typedef struct { /* HPL section */ HPL_T_test test; int nval [HPL_MAX_PARAM], nbval [HPL_MAX_PARAM], pval [HPL_MAX_PARAM], qval [HPL_MAX_PARAM], nbmval[HPL_MAX_PARAM], ndvval[HPL_MAX_PARAM], ndhval[HPL_MAX_PARAM]; HPL_T_ORDER porder; HPL_T_FACT pfaval[HPL_MAX_PARAM], rfaval[HPL_MAX_PARAM]; HPL_T_TOP topval[HPL_MAX_PARAM]; HPL_T_FACT rpfa; HPL_T_SWAP fswap; int ns, nbs, npqs, npfs, nbms, ndvs, nrfs, ntps, ndhs, tswap, L1notran, Unotran, equil, align; /* HPCC section */ char inFname[256 + 1], outFname[256 + 1]; int PTRANSns, PTRANSnval[2 * HPL_MAX_PARAM]; int PTRANSnbs, PTRANSnbval[2 * HPL_MAX_PARAM]; int PTRANSnpqs, PTRANSpval[2 * HPL_MAX_PARAM], PTRANSqval[2 * HPL_MAX_PARAM]; double MPIRandomAccess_LCG_GUPs, MPIRandomAccess_GUPs, Star_LCG_GUPs, Single_LCG_GUPs, StarGUPs, SingleGUPs, MPIRandomAccess_ErrorsFraction, MPIRandomAccess_time, MPIRandomAccess_CheckTime, MPIRandomAccess_TimeBound, MPIRandomAccess_LCG_ErrorsFraction, MPIRandomAccess_LCG_time, MPIRandomAccess_LCG_CheckTime, MPIRandomAccess_LCG_TimeBound, StarStreamCopyGBs, StarStreamScaleGBs, StarStreamAddGBs, StarStreamTriadGBs, SingleStreamCopyGBs, SingleStreamScaleGBs, SingleStreamAddGBs, SingleStreamTriadGBs, StarDGEMMGflops, SingleDGEMMGflops; double StarFFTGflops, SingleFFTGflops, MPIFFTGflops, MPIFFT_maxErr; double MaxPingPongLatency, RandomlyOrderedRingLatency, MinPingPongBandwidth, NaturallyOrderedRingBandwidth, RandomlyOrderedRingBandwidth, MinPingPongLatency, AvgPingPongLatency, MaxPingPongBandwidth, AvgPingPongBandwidth, NaturallyOrderedRingLatency; int DGEMM_N; int StreamThreads, StreamVectorSize; int FFT_N; int MPIFFT_Procs; int MPIRandomAccess_LCG_Algorithm, MPIRandomAccess_Algorithm; HPL_RuntimeData HPLrdata; PTRANS_RuntimeData PTRANSrdata; int Failure; /* over all failure of the benchmark */ double MPIFFTtimingsForward[MPIFFT_TIMING_COUNT], MPIFFTtimingsBackward[MPIFFT_TIMING_COUNT]; size_t HPLMaxProcMem; int HPLMaxProc, HPLMinProc; int RunHPL, RunStarDGEMM, RunSingleDGEMM, RunPTRANS, RunStarStream, RunSingleStream, RunMPIRandomAccess_LCG, RunStarRandomAccess_LCG, RunSingleRandomAccess_LCG, RunMPIRandomAccess, RunStarRandomAccess, RunSingleRandomAccess, RunStarFFT, RunSingleFFT, RunMPIFFT, RunLatencyBandwidth; int FFTEnblk, FFTEnp, FFTEl2size; s64Int RandomAccess_LCG_N, RandomAccess_N, MPIRandomAccess_LCG_ExeUpdates, MPIRandomAccess_ExeUpdates, MPIRandomAccess_LCG_N, MPIRandomAccess_N, MPIRandomAccess_LCG_Errors, MPIRandomAccess_Errors, MPIFFT_N; } HPCC_Params; /* This is what needs to be done to add a new benchmark: - Add the benchmark code to the directory structure (headers, makefiles) - Add benchmark output data to the HPCC_Params structure. - Initialize the HPCC_Params structure data in HPCC_Init(). - Add a call to the benchmark function in main(). - Make sure that all the processes fill out the structure with the same data. - Print the output of the benchmark in HPCC_Finalize(). - For tests that have "Star" and "Single" variants (DGEMM, RandomAccess, STREAM) the function that performs the test returns a value (0 or 1) that indicates runtime failure and also returns benchamark failure (due to wrong optimization that causes numerical error) by setting params->Failure. */ int HPCC_external_init(int argc, char *argv[], void *extdata); int HPCC_external_finalize(int argc, char *argv[], void *extdata); extern int HPCC_Init(HPCC_Params *params); extern int HPCC_Finalize(HPCC_Params *params); extern int HPCC_LocalVectorSize(HPCC_Params *params, int vecCnt, size_t size, int pow2); extern int HPCC_Defaults(HPL_T_test *TEST, int *NS, int *N, int *NBS, int *NB, HPL_T_ORDER *PMAPPIN, int *NPQS, int *P, int *Q, int *NPFS, HPL_T_FACT *PF, int *NBMS, int *NBM, int *NDVS, int *NDV, int *NRFS, HPL_T_FACT *RF, int *NTPS, HPL_T_TOP *TP, int *NDHS, int *DH, HPL_T_SWAP *FSWAP, int *TSWAP, int *L1NOTRAN, int *UNOTRAN, int *EQUIL, int *ALIGN, MPI_Comm comm); extern int HPL_main(int ARGC, char **ARGV, HPL_RuntimeData *rdata, int *failure); extern float HPL_slamch (const HPL_T_MACH); extern double HPCC_dweps(); extern float HPCC_sweps(); extern int HPCC_StarDGEMM(HPCC_Params *params); extern int HPCC_SingleDGEMM(HPCC_Params *params); extern int PTRANS(HPCC_Params *params); extern int HPCC_MPIRandomAccess_LCG(HPCC_Params *params); extern int HPCC_SingleRandomAccess_LCG(HPCC_Params *params); extern int HPCC_StarRandomAccess_LCG(HPCC_Params *params); extern int HPCC_MPIRandomAccess(HPCC_Params *params); extern int HPCC_SingleRandomAccess(HPCC_Params *params); extern int HPCC_StarRandomAccess(HPCC_Params *params); extern int HPCC_SingleStream(HPCC_Params *params); extern int HPCC_StarStream(HPCC_Params *params); extern int HPCC_StarFFT(HPCC_Params *params); extern int HPCC_SingleFFT(HPCC_Params *params); extern int HPCC_MPIFFT(HPCC_Params *params); extern int HPCC_TestFFT(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure); extern int HPCC_TestDGEMM(HPCC_Params *params, int doIO, double *UGflops, int *Un, int *Ufailure); extern int MaxMem(int nprocs, int imrow, int imcol, int nmat, int *mval, int *nval, int nbmat, int *mbval, int *nbval, int ngrids, int *npval, int *nqval, long *maxMem); extern int HPCC_Stream(HPCC_Params *params, int doIO, double *copyGBs, double *scaleGBs, double *addGBs, double *triadGBs, int *failure); extern void main_bench_lat_bw(HPCC_Params *params); extern int pdtrans(char *trans, int *m, int *n, int * mb, int *nb, double *a, int *lda, double *beta, double *c__, int *ldc, int *imrow, int *imcol, double *work, int *iwork); extern FILE* pdtransinfo(int *nmat, int *mval, int *nval, int *ldval, int *nbmat, int *mbval, int *nbval, int *ldnbval, int *ngrids, int *npval, int *nqval, int *ldpqval, int *iaseed, int *imrow, int *imcol, float *thresh, int *iam, int *nprocs, double *eps, char *infname, int *fcl, char *outfname); extern int pdmatgen(int *ictxt, char *aform, char *diag, int *m, int *n, int *mb, int *nb, double*a, int *lda, int *iarow, int *iacol, int *iseed, int *iroff, int *irnum, int *icoff, int *icnum, int * myrow, int *mycol, int *nprow, int *npcol, double alpha); extern int pdmatcmp(int *ictxt, int *m_, int *n_, double *a, int *lda_, double *aCopy, int *ldc_, double *error); extern int pxerbla(int *ictxt, char *srname, int *info); extern int slcombine_(int *ictxt, char *scope, char *op, char * timetype, int *n, int *ibeg, double *times); extern int icopy_(int *n, int *sx, int *incx, int * sy, int *incy); extern int numroc_(int *, int *, int *, int *, int *); extern int slboot_(void); extern int sltimer_(int *i__); extern int ilcm_(int *, int *); extern int iceil_(int *, int *); extern double pdrand(); extern int setran_(int *, int *, int *); extern int jumpit_(int *, int *, int *, int *); extern int xjumpm_(int *, int *, int *, int *, int *, int *, int *); /* ---------------------------------------------------------------------- */ #define DPRN(i,v) do{printf(__FILE__ "(%d)@%d:" #v "=%g\n",__LINE__,i,(double)(v));fflush(stdout);}while(0) #define BEGIN_IO(r,fn,f) if(0==r){f=fopen(fn,"a");if(!f)fprintf(f=stderr,"Problem with appending to file '%s'\n",fn) #define END_IO(r,f) fflush(f); if (f!=stdout && f!=stderr) fclose(f);} f=(FILE*)(NULL) #include #define XMALLOC(t,s) ((t*)malloc(sizeof(t)*(s))) #define XCALLOC(t,s) ((t*)calloc((s),sizeof(t))) #endif hpcc-1.4.1/include/hpccver.h0000644000000000000000000000047211403763471012560 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; -*- */ /* Version has four components: MAJOR, MINOR, MICRO and RELEASE. RELEASE is a, b, c, f (alpha, beta, candidate, and final). */ #define HPCC_VERSION_MAJOR 1 #define HPCC_VERSION_MINOR 4 #define HPCC_VERSION_MICRO 1 #define HPCC_VERSION_RELEASE 'f' hpcc-1.4.1/src/HPL_slamch.c0000644000000000000000000006630111256503660012240 00000000000000/* * -- High Performance Computing Linpack Benchmark (HPL) * HPL - 1.0a - January 20, 2004 * Antoine P. Petitet * University of Tennessee, Knoxville * Innovative Computing Laboratories * (C) Copyright 2000-2004 All Rights Reserved * * -- Copyright notice and Licensing terms: * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions, and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * 3. All advertising materials mentioning features or use of this * software must display the following acknowledgement: * This product includes software developed at the University of * Tennessee, Knoxville, Innovative Computing Laboratories. * * 4. The name of the University, the name of the Laboratory, or the * names of its contributors may not be used to endorse or promote * products derived from this software without specific written * permission. * * -- Disclaimer: * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE UNIVERSITY * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, * DATA OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * --------------------------------------------------------------------- */ /* * Include files */ #include #ifdef HPL_rone #undef HPL_rone #endif #define HPL_rone 1.0f #ifdef HPL_rtwo #undef HPL_rtwo #endif #define HPL_rtwo 2.0f #ifdef HPL_rzero #undef HPL_rzero #endif #define HPL_rzero 0.0f /* * --------------------------------------------------------------------- * Static function prototypes * --------------------------------------------------------------------- */ static void HPL_slamc1 STDC_ARGS( ( int *, int *, int *, int * ) ); static void HPL_slamc2 STDC_ARGS( ( int *, int *, int *, float *, int *, float *, int *, float * ) ); static float HPL_slamc3 STDC_ARGS( ( const float, const float ) ); static void HPL_slamc4 STDC_ARGS( ( int *, const float, const int ) ); static void HPL_slamc5 STDC_ARGS( ( const int, const int, const int, const int, int *, float * ) ); static float HPL_sipow STDC_ARGS( ( const float, const int ) ); #ifdef HPL_STDC_HEADERS float HPL_slamch ( const HPL_T_MACH CMACH ) #else float HPL_slamch ( CMACH ) const HPL_T_MACH CMACH; #endif { /* * Purpose * ======= * * HPL_slamch determines machine-specific arithmetic constants such as * the relative machine precision (eps), the safe minimum (sfmin) such * that 1 / sfmin does not overflow, the base of the machine (base), the * precision (prec), the number of (base) digits in the mantissa (t), * whether rounding occurs in addition (rnd=1.0 and 0.0 otherwise), the * minimum exponent before (gradual) underflow (emin), the underflow * threshold (rmin) base**(emin-1), the largest exponent before overflow * (emax), the overflow threshold (rmax) (base**emax)*(1-eps). * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamch.f (version 2.0 -- 1992), that was itself * based on the function ENVRON by Malcolm and incorporated suggestions * by Gentleman and Marovich. See * * Malcolm M. A., Algorithms to reveal properties of floating-point * arithmetic., Comms. of the ACM, 15, 949-951 (1972). * * Gentleman W. M. and Marovich S. B., More on algorithms that reveal * properties of floating point arithmetic units., Comms. of the ACM, * 17, 276-277 (1974). * * Arguments * ========= * * CMACH (local input) const HPL_T_MACH * Specifies the value to be returned by HPL_slamch * = HPL_MACH_EPS, HPL_slamch := eps (default) * = HPL_MACH_SFMIN, HPL_slamch := sfmin * = HPL_MACH_BASE, HPL_slamch := base * = HPL_MACH_PREC, HPL_slamch := eps*base * = HPL_MACH_MLEN, HPL_slamch := t * = HPL_MACH_RND, HPL_slamch := rnd * = HPL_MACH_EMIN, HPL_slamch := emin * = HPL_MACH_RMIN, HPL_slamch := rmin * = HPL_MACH_EMAX, HPL_slamch := emax * = HPL_MACH_RMAX, HPL_slamch := rmax * * where * * eps = relative machine precision, * sfmin = safe minimum, * base = base of the machine, * prec = eps*base, * t = number of digits in the mantissa, * rnd = 1.0 if rounding occurs in addition, * emin = minimum exponent before underflow, * rmin = underflow threshold, * emax = largest exponent before overflow, * rmax = overflow threshold. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ static float eps, sfmin, base, t, rnd, emin, rmin, emax, rmax, prec; float small; static int first=1; int beta=0, imax=0, imin=0, it=0, lrnd=0; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; HPL_slamc2( &beta, &it, &lrnd, &eps, &imin, &rmin, &imax, &rmax ); base = (float)(beta); t = (float)(it); if( lrnd != 0 ) { rnd = HPL_rone; eps = HPL_sipow( base, 1 - it ) / HPL_rtwo; } else { rnd = HPL_rzero; eps = HPL_sipow( base, 1 - it ); } prec = eps * base; emin = (float)(imin); emax = (float)(imax); sfmin = rmin; small = HPL_rone / rmax; /* * Use SMALL plus a bit, to avoid the possibility of rounding causing * overflow when computing 1/sfmin. */ if( small >= sfmin ) sfmin = small * ( HPL_rone + eps ); } if( CMACH == HPL_MACH_EPS ) return( eps ); if( CMACH == HPL_MACH_SFMIN ) return( sfmin ); if( CMACH == HPL_MACH_BASE ) return( base ); if( CMACH == HPL_MACH_PREC ) return( prec ); if( CMACH == HPL_MACH_MLEN ) return( t ); if( CMACH == HPL_MACH_RND ) return( rnd ); if( CMACH == HPL_MACH_EMIN ) return( emin ); if( CMACH == HPL_MACH_RMIN ) return( rmin ); if( CMACH == HPL_MACH_EMAX ) return( emax ); if( CMACH == HPL_MACH_RMAX ) return( rmax ); return( eps ); /* * End of HPL_slamch */ } #ifdef HPL_STDC_HEADERS static void HPL_slamc1 ( int * BETA, int * T, int * RND, int * IEEE1 ) #else static void HPL_slamc1 ( BETA, T, RND, IEEE1 ) /* * .. Scalar Arguments .. */ int * BETA, * IEEE1, * RND, * T; #endif { /* * Purpose * ======= * * HPL_slamc1 determines the machine parameters given by BETA, T, RND, * and IEEE1. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamc1.f (version 2.0 -- 1992), that was itself * based on the function ENVRON by Malcolm and incorporated suggestions * by Gentleman and Marovich. See * * Malcolm M. A., Algorithms to reveal properties of floating-point * arithmetic., Comms. of the ACM, 15, 949-951 (1972). * * Gentleman W. M. and Marovich S. B., More on algorithms that reveal * properties of floating point arithmetic units., Comms. of the ACM, * 17, 276-277 (1974). * * Arguments * ========= * * BETA (local output) int * * The base of the machine. * * T (local output) int * * The number of ( BETA ) digits in the mantissa. * * RND (local output) int * * Specifies whether proper rounding (RND=1) or chopping (RND=0) * occurs in addition. This may not be a reliable guide to the * way in which the machine performs its arithmetic. * * IEEE1 (local output) int * * Specifies whether rounding appears to be done in the IEEE * `round to nearest' style (IEEE1=1), (IEEE1=0) otherwise. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ float a, b, c, f, one, qtr, savec, t1, t2; static int first=1, lbeta, lieee1, lrnd, lt; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; one = HPL_rone; /* * lbeta, lieee1, lt and lrnd are the local values of BETA, IEEE1, T and * RND. Throughout this routine we use the function HPL_slamc3 to ensure * that relevant values are stored and not held in registers, or are not * affected by optimizers. * * Compute a = 2.0**m with the smallest positive integer m such that * fl( a + 1.0 ) == a. */ a = HPL_rone; c = HPL_rone; do { a *= HPL_rtwo; c = HPL_slamc3( a, one ); c = HPL_slamc3( c, -a ); } while( c == HPL_rone ); /* * Now compute b = 2.0**m with the smallest positive integer m such that * fl( a + b ) > a. */ b = HPL_rone; c = HPL_slamc3( a, b ); while( c == a ) { b *= HPL_rtwo; c = HPL_slamc3( a, b ); } /* * Now compute the base. a and c are neighbouring floating point num- * bers in the interval ( BETA**T, BETA**( T + 1 ) ) and so their diffe- * rence is BETA. Adding 0.25 to c is to ensure that it is truncated to * BETA and not (BETA-1). */ qtr = one / 4.0; savec = c; c = HPL_slamc3( c, -a ); lbeta = (int)(c+qtr); /* * Now determine whether rounding or chopping occurs, by adding a bit * less than BETA/2 and a bit more than BETA/2 to a. */ b = (float)(lbeta); f = HPL_slamc3( b / HPL_rtwo, -b / 100.0 ); c = HPL_slamc3( f, a ); if( c == a ) { lrnd = 1; } else { lrnd = 0; } f = HPL_slamc3( b / HPL_rtwo, b / 100.0 ); c = HPL_slamc3( f, a ); if( ( lrnd != 0 ) && ( c == a ) ) lrnd = 0; /* * Try and decide whether rounding is done in the IEEE round to nea- * rest style. b/2 is half a unit in the last place of the two numbers * a and savec. Furthermore, a is even, i.e. has last bit zero, and sa- * vec is odd. Thus adding b/2 to a should not change a, but adding b/2 * to savec should change savec. */ t1 = HPL_slamc3( b / HPL_rtwo, a ); t2 = HPL_slamc3( b / HPL_rtwo, savec ); if ( ( t1 == a ) && ( t2 > savec ) && ( lrnd != 0 ) ) lieee1 = 1; else lieee1 = 0; /* * Now find the mantissa, T. It should be the integer part of log to the * base BETA of a, however it is safer to determine T by powering. So we * find T as the smallest positive integer for which fl( beta**t + 1.0 ) * is equal to 1.0. */ lt = 0; a = HPL_rone; c = HPL_rone; do { lt++; a *= (float)(lbeta); c = HPL_slamc3( a, one ); c = HPL_slamc3( c, -a ); } while( c == HPL_rone ); } *BETA = lbeta; *T = lt; *RND = lrnd; *IEEE1 = lieee1; } #ifdef HPL_STDC_HEADERS static void HPL_slamc2 ( int * BETA, int * T, int * RND, float * EPS, int * EMIN, float * RMIN, int * EMAX, float * RMAX ) #else static void HPL_slamc2( BETA, T, RND, EPS, EMIN, RMIN, EMAX, RMAX ) /* * .. Scalar Arguments .. */ int * BETA, * EMAX, * EMIN, * RND, * T; float * EPS, * RMAX, * RMIN; #endif { /* * Purpose * ======= * * HPL_slamc2 determines the machine parameters specified in its argu- * ment list. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamc2.f (version 2.0 -- 1992), that was itself * based on a function PARANOIA by W. Kahan of the University of Cali- * fornia at Berkeley for the computation of the relative machine epsi- * lon eps. * * Arguments * ========= * * BETA (local output) int * * The base of the machine. * * T (local output) int * * The number of ( BETA ) digits in the mantissa. * * RND (local output) int * * Specifies whether proper rounding (RND=1) or chopping (RND=0) * occurs in addition. This may not be a reliable guide to the * way in which the machine performs its arithmetic. * * EPS (local output) float * * The smallest positive number such that fl( 1.0 - EPS ) < 1.0, * where fl denotes the computed value. * * EMIN (local output) int * * The minimum exponent before (gradual) underflow occurs. * * RMIN (local output) float * * The smallest normalized number for the machine, given by * BASE**( EMIN - 1 ), where BASE is the floating point value * of BETA. * * EMAX (local output) int * * The maximum exponent before overflow occurs. * * RMAX (local output) float * * The largest positive number for the machine, given by * BASE**EMAX * ( 1 - EPS ), where BASE is the floating point * value of BETA. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ static float leps, lrmax, lrmin; float a, b, c, half, one, rbase, sixth, small, third, two, zero; static int first=1, iwarn=0, lbeta=0, lemax, lemin, lt=0; int gnmin=0, gpmin=0, i, ieee, lieee1=0, lrnd=0, ngnmin=0, ngpmin=0; /* .. * .. Executable Statements .. */ if( first != 0 ) { first = 0; zero = HPL_rzero; one = HPL_rone; two = HPL_rtwo; /* * lbeta, lt, lrnd, leps, lemin and lrmin are the local values of BETA, * T, RND, EPS, EMIN and RMIN. * * Throughout this routine we use the function HPL_slamc3 to ensure that * relevant values are stored and not held in registers, or are not af- * fected by optimizers. * * HPL_slamc1 returns the parameters lbeta, lt, lrnd and lieee1. */ HPL_slamc1( &lbeta, <, &lrnd, &lieee1 ); /* * Start to find eps. */ b = (float)(lbeta); a = HPL_sipow( b, -lt ); leps = a; /* * Try some tricks to see whether or not this is the correct EPS. */ b = two / 3.0; half = one / HPL_rtwo; sixth = HPL_slamc3( b, -half ); third = HPL_slamc3( sixth, sixth ); b = HPL_slamc3( third, -half ); b = HPL_slamc3( b, sixth ); b = Mabs( b ); if( b < leps ) b = leps; leps = HPL_rone; while( ( leps > b ) && ( b > zero ) ) { leps = b; c = HPL_slamc3( half * leps, HPL_sipow( two, 5 ) * HPL_sipow( leps, 2 ) ); c = HPL_slamc3( half, -c ); b = HPL_slamc3( half, c ); c = HPL_slamc3( half, -b ); b = HPL_slamc3( half, c ); } if( a < leps ) leps = a; /* * Computation of EPS complete. * * Now find EMIN. Let a = + or - 1, and + or - (1 + BASE**(-3)). Keep * dividing a by BETA until (gradual) underflow occurs. This is detected * when we cannot recover the previous a. */ rbase = one / (float)(lbeta); small = one; for( i = 0; i < 3; i++ ) small = HPL_slamc3( small * rbase, zero ); a = HPL_slamc3( one, small ); HPL_slamc4( &ngpmin, one, lbeta ); HPL_slamc4( &ngnmin, -one, lbeta ); HPL_slamc4( &gpmin, a, lbeta ); HPL_slamc4( &gnmin, -a, lbeta ); ieee = 0; if( ( ngpmin == ngnmin ) && ( gpmin == gnmin ) ) { if( ngpmin == gpmin ) { /* * Non twos-complement machines, no gradual underflow; e.g., VAX ) */ lemin = ngpmin; } else if( ( gpmin-ngpmin ) == 3 ) { /* * Non twos-complement machines with gradual underflow; e.g., IEEE stan- * dard followers */ lemin = ngpmin - 1 + lt; ieee = 1; } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, gpmin ); iwarn = 1; } } else if( ( ngpmin == gpmin ) && ( ngnmin == gnmin ) ) { if( Mabs( ngpmin-ngnmin ) == 1 ) { /* * Twos-complement machines, no gradual underflow; e.g., CYBER 205 */ lemin = Mmax( ngpmin, ngnmin ); } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); iwarn = 1; } } else if( ( Mabs( ngpmin-ngnmin ) == 1 ) && ( gpmin == gnmin ) ) { if( ( gpmin - Mmin( ngpmin, ngnmin ) ) == 3 ) { /* * Twos-complement machines with gradual underflow; no known machine */ lemin = Mmax( ngpmin, ngnmin ) - 1 + lt; } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); iwarn = 1; } } else { /* * A guess; no known machine */ lemin = Mmin( ngpmin, ngnmin ); lemin = Mmin( lemin, gpmin ); lemin = Mmin( lemin, gnmin ); iwarn = 1; } /* * Comment out this if block if EMIN is ok */ if( iwarn != 0 ) { first = 1; HPL_fprintf( stderr, "\n %s %8d\n%s\n%s\n%s\n", "WARNING. The value EMIN may be incorrect:- EMIN =", lemin, "If, after inspection, the value EMIN looks acceptable, please comment ", "out the if block as marked within the code of routine HPL_slamc2, ", "otherwise supply EMIN explicitly." ); } /* * Assume IEEE arithmetic if we found denormalised numbers above, or if * arithmetic seems to round in the IEEE style, determined in routine * HPL_slamc1. A true IEEE machine should have both things true; how- * ever, faulty machines may have one or the other. */ if( ( ieee != 0 ) || ( lieee1 != 0 ) ) ieee = 1; else ieee = 0; /* * Compute RMIN by successive division by BETA. We could compute RMIN * as BASE**( EMIN - 1 ), but some machines underflow during this compu- * tation. */ lrmin = HPL_rone; for( i = 0; i < 1 - lemin; i++ ) lrmin = HPL_slamc3( lrmin*rbase, zero ); /* * Finally, call HPL_slamc5 to compute emax and rmax. */ HPL_slamc5( lbeta, lt, lemin, ieee, &lemax, &lrmax ); } *BETA = lbeta; *T = lt; *RND = lrnd; *EPS = leps; *EMIN = lemin; *RMIN = lrmin; *EMAX = lemax; *RMAX = lrmax; } #ifdef HPL_STDC_HEADERS static float HPL_slamc3( const float A, const float B ) #else static float HPL_slamc3( A, B ) /* * .. Scalar Arguments .. */ const float A, B; #endif { /* * Purpose * ======= * * HPL_slamc3 is intended to force a and b to be stored prior to doing * the addition of a and b, for use in situations where optimizers * might hold one of these in a register. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamc3.f (version 2.0 -- 1992). * * Arguments * ========= * * A, B (local input) float * The values a and b. * * --------------------------------------------------------------------- */ /* .. * .. Executable Statements .. */ return( A + B ); } #ifdef HPL_STDC_HEADERS static void HPL_slamc4 ( int * EMIN, const float START, const int BASE ) #else static void HPL_slamc4( EMIN, START, BASE ) /* * .. Scalar Arguments .. */ int * EMIN; const int BASE; const float START; #endif { /* * Purpose * ======= * * HPL_slamc4 is a service function for HPL_slamc2. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamc4.f (version 2.0 -- 1992). * * Arguments * ========= * * EMIN (local output) int * * The minimum exponent before (gradual) underflow, computed by * setting A = START and dividing by BASE until the previous A * can not be recovered. * * START (local input) float * The starting point for determining EMIN. * * BASE (local input) int * The base of the machine. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ float a, b1, b2, c1, c2, d1, d2, one, rbase, zero; int i; /* .. * .. Executable Statements .. */ a = START; one = HPL_rone; rbase = one / (float)(BASE); zero = HPL_rzero; *EMIN = 1; b1 = HPL_slamc3( a * rbase, zero ); c1 = c2 = d1 = d2 = a; do { (*EMIN)--; a = b1; b1 = HPL_slamc3( a / BASE, zero ); c1 = HPL_slamc3( b1 * BASE, zero ); d1 = zero; for( i = 0; i < BASE; i++ ) d1 = d1 + b1; b2 = HPL_slamc3( a * rbase, zero ); c2 = HPL_slamc3( b2 / rbase, zero ); d2 = zero; for( i = 0; i < BASE; i++ ) d2 = d2 + b2; } while( ( c1 == a ) && ( c2 == a ) && ( d1 == a ) && ( d2 == a ) ); } #ifdef HPL_STDC_HEADERS static void HPL_slamc5 ( const int BETA, const int P, const int EMIN, const int IEEE, int * EMAX, float * RMAX ) #else static void HPL_slamc5( BETA, P, EMIN, IEEE, EMAX, RMAX ) /* * .. Scalar Arguments .. */ const int BETA, EMIN, IEEE, P; int * EMAX; float * RMAX; #endif { /* * Purpose * ======= * * HPL_slamc5 attempts to compute RMAX, the largest machine floating- * point number, without overflow. It assumes that EMAX + abs(EMIN) sum * approximately to a power of 2. It will fail on machines where this * assumption does not hold, for example, the Cyber 205 (EMIN = -28625, * EMAX = 28718). It will also fail if the value supplied for EMIN is * too large (i.e. too close to zero), probably with overflow. * * Notes * ===== * * This function has been manually translated from the Fortran 77 LAPACK * auxiliary function slamc5.f (version 2.0 -- 1992). * * Arguments * ========= * * BETA (local input) int * The base of floating-point arithmetic. * * P (local input) int * The number of base BETA digits in the mantissa of a floating- * point value. * * EMIN (local input) int * The minimum exponent before (gradual) underflow. * * IEEE (local input) int * A logical flag specifying whether or not the arithmetic sys- * tem is thought to comply with the IEEE standard. * * EMAX (local output) int * * The largest exponent before overflow. * * RMAX (local output) float * * The largest machine floating-point number. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ float oldy=HPL_rzero, recbas, y, z; int exbits=1, expsum, i, lexp=1, nbits, try_, uexp; /* .. * .. Executable Statements .. */ /* * First compute lexp and uexp, two powers of 2 that bound abs(EMIN). * We then assume that EMAX + abs( EMIN ) will sum approximately to the * bound that is closest to abs( EMIN ). (EMAX is the exponent of the * required number RMAX). */ l_10: try_ = (int)( (unsigned int)(lexp) << 1 ); if( try_ <= ( -EMIN ) ) { lexp = try_; exbits++; goto l_10; } if( lexp == -EMIN ) { uexp = lexp; } else { uexp = try_; exbits++; } /* * Now -lexp is less than or equal to EMIN, and -uexp is greater than or * equal to EMIN. exbits is the number of bits needed to store the expo- * nent. */ if( ( uexp+EMIN ) > ( -lexp-EMIN ) ) { expsum = (int)( (unsigned int)(lexp) << 1 ); } else { expsum = (int)( (unsigned int)(uexp) << 1 ); } /* * expsum is the exponent range, approximately equal to EMAX - EMIN + 1. */ *EMAX = expsum + EMIN - 1; /* * nbits is the total number of bits needed to store a floating-point * number. */ nbits = 1 + exbits + P; if( ( nbits % 2 == 1 ) && ( BETA == 2 ) ) { /* * Either there are an odd number of bits used to store a floating-point * number, which is unlikely, or some bits are not used in the represen- * tation of numbers, which is possible, (e.g. Cray machines) or the * mantissa has an implicit bit, (e.g. IEEE machines, Dec Vax machines), * which is perhaps the most likely. We have to assume the last alterna- * tive. If this is true, then we need to reduce EMAX by one because * there must be some way of representing zero in an implicit-bit sys- * tem. On machines like Cray we are reducing EMAX by one unnecessarily. */ (*EMAX)--; } if( IEEE != 0 ) { /* * Assume we are on an IEEE machine which reserves one exponent for in- * finity and NaN. */ (*EMAX)--; } /* * Now create RMAX, the largest machine number, which should be equal to * (1.0 - BETA**(-P)) * BETA**EMAX . First compute 1.0-BETA**(-P), being * careful that the result is less than 1.0. */ recbas = HPL_rone / (float)(BETA); z = (float)(BETA) - HPL_rone; y = HPL_rzero; for( i = 0; i < P; i++ ) { z *= recbas; if( y < HPL_rone ) oldy = y; y = HPL_slamc3( y, z ); } if( y >= HPL_rone ) y = oldy; /* * Now multiply by BETA**EMAX to get RMAX. */ for( i = 0; i < *EMAX; i++ ) y = HPL_slamc3( y * BETA, HPL_rzero ); *RMAX = y; /* * End of HPL_slamch */ } #ifdef HPL_STDC_HEADERS static float HPL_sipow ( const float X, const int N ) #else static float HPL_sipow( X, N ) /* * .. Scalar Arguments .. */ const int N; const float X; #endif { /* * Purpose * ======= * * HPL_sipow computes the integer n-th power of a real scalar x. * * Arguments * ========= * * X (local input) const float * The real scalar x. * * N (local input) const int * The integer power to raise x to. * * --------------------------------------------------------------------- */ /* * .. Local Variables .. */ float r, y=HPL_rone; int k, n; /* .. * .. Executable Statements .. */ if( X == HPL_rzero ) return( HPL_rzero ); if( N < 0 ) { n = -N; r = HPL_rone / X; } else { n = N; r = X; } for( k = 0; k < n; k++ ) y *= r; return( y ); } hpcc-1.4.1/src/bench_lat_bw_1.5.2.c0000644000000000000000000015573611256503660013433 00000000000000/* * Bandwidth-Latency-Benchmark * * Authors: Rolf Rabenseifner * Gerrit Schulz * Michael Speck * * Copyright (c) 2003 HLRS, University of Stuttgart * */ /* ----------------------------------------------------------------------- * * This Bandwidth-Latency-Benchmark measures three types of latency and * bandwidth: * * - Maximal latency and minimal bandwidth over a set of independently * running ping-pong benchmarks. If there is enough benchmarking time, * then each process makes a ping-pong benchmark with each other * process, otherwise only a subset of process-pairs is used. * (Additionally, minimal and average latency and maximal and average * bandwidth is also reported.) * * - Bandwidth per process (and latency) of a ring pattern, i.e., * each process sends in parallel a message to its neighbor in a ring. * The ring is build by the sequence of the ranks in MPI_COMM_WORLD * (naturally ordered ring). * * - Bandwidth and latency of 10 (or 30) different and randomly ordered rings. * * The major results are: * * - maximal ping pong latency, * - average latency of parallel communication in randomly ordered rings, * - minimal ping pong bandwidth, * - bandwidth per process in the naturally ordered ring, * - average bandwidth per process in randomly ordered rings. * * These five numbers characterize the strength or weakness of a network. * For example, the ratio ping pong bandwidth : naturally-ordered-ring * bandwidth : random-ring bandwidth, may be * - on a torus network (e.g. T3E): 1 : 1/2 : 1/5 * - on a bus connecting n CPUs: 1 : 1/n : 1/n * - on a shared memory vector sys.: 1 : 1/2 : 1/2 * - on a full cross-bar: 1 : 1 : 1 (1 MPI process per node) * * The set of ping-pong measurements is based on an idea of Jack Dongarra * communicated with the authors at the EuroPVM/MPI 2003 conference. * The ring benchmark is based on the ideas of the effective bandwidth * benchmark ( www.hlrs.de/mpi/b_eff ). * * All measurements are done by repeating the communication pattern * several times (see arguments "number_of_measurements") and using * the minimal execution time. * Each pattern (ping pong, naturally, or randomly ordered ring) is * benchmarked by repeating the pattern in a loop (see argument * "loop_length") and starting the time measurement (with MPI_Wtime) * after a first additional and non-measured iteration and ending * the time measurement after the end of this loop. * All latency measurements are done with 8 byte messages, and bandwidth * measurements with 2,000,000 bytes. * Ping pong benchmarking is done with MPI standard send and receive, * the ring patterns are communicated in both directions using the * best result of two implementations: (a) with two calls to MPI_Sendrecv * and (b) with two non-blocking receives and two non-blocking sends * (two allow duplex usage of the network links). * * The benchmarking routine bench_lat_bw() has 2 input arguments: * * - The maximal execution time (in seconds) that should be used for * measuring the latency of all ping pong pairs (e.g. on a T3E, * (, and * - the maximal time that should be used for the ping pong bandwidth. * * Additionally, the benchmark needs 4 GB / random-ring-bandwidth, * e.g., 400 sec (40 sec) if the bandwidth is 10 MB/s (100 MB/s), * and additionally 3000 * random-ring-latency, e.g., 30 sec (3 sec) * if the latency is 10 msec (1 msec). * * All arguments are in sec or in byte/sec. The printing routine * reports on stdout all latency values in milli sec (msec) and * all bandwidth values in MB/s (with 1 MB/s = 10**6 byte/sec) * * ----------------------------------------------------------------------- * * Typical output on a Cray T3E: * ----------------------------- * * ------------------------------------------------------------------ * Latency-Bandwidth-Benchmark R1.5 (c) HLRS, University of Stuttgart * * Major Benchmark results: * ------------------------ * * Max Ping Pong Latency: 0.005209 msecs * Randomly Ordered Ring Latency: 0.007956 msecs * Min Ping Pong Bandwidth: 314.025708 MB/s * Naturally Ordered Ring Bandwidth: 147.600097 MB/s * Randomly Ordered Ring Bandwidth: 61.096556 MB/s * * ------------------------------------------------------------------ * * Detailed benchmark results: * Ping Pong: * Latency min / avg / max: 0.004268 / 0.004588 / 0.005209 msecs * Bandwidth min / avg / max: 314.026 / 318.653 / 324.822 MByte/s * Ring: * On naturally ordered ring: latency= 0.008512 msec, bandwidth= 147.600097 MB/s * On randomly ordered ring: latency= 0.007956 msec, bandwidth= 61.096556 MB/s * * ------------------------------------------------------------------ * * Benchmark conditions: * The latency measurements were done with 8 bytes * The bandwidth measurements were done with 4000000 bytes * The ring communication was done in both directions on 64 processes * The Ping Pong measurements were done on * - 4032 pairs of processes for latency benchmarking, and * - 462 pairs of processes for bandwidth benchmarking, * out of 64*(64-1) = 4032 possible combinations on 64 processes. * (1 MB/s = 10**6 byte/sec) * * ------------------------------------------------------------------ * * Typical output on a NEC SX-5 (shared memory) * -------------------------------------------- * * ------------------------------------------------------------------ * Latency-Bandwidth-Benchmark R1.5 (c) HLRS, University of Stuttgart * * Major Benchmark results: * ------------------------ * * Max Ping Pong Latency: 0.005688 msecs * Randomly Ordered Ring Latency: 0.007819 msecs * Min Ping Pong Bandwidth: 7875.941147 MB/s * Naturally Ordered Ring Bandwidth: 4182.560664 MB/s * Randomly Ordered Ring Bandwidth: 4393.213906 MB/s * * ------------------------------------------------------------------ * * Detailed benchmark results: * Ping Pong: * Latency min / avg / max: 0.005595 / 0.005629 / 0.005688 msecs * Bandwidth min / avg / max: 7875.941 / 7912.086 / 7928.861 MByte/s * Ring: * On naturally ordered ring: latency= 0.009812 msec, bandwidth= 4182.560664 MB/s * On randomly ordered ring: latency= 0.007819 msec, bandwidth= 4393.213906 MB/s * * ------------------------------------------------------------------ * * Benchmark conditions: * The latency measurements were done with 8 bytes * The bandwidth measurements were done with 4000000 bytes * The ring communication was done in both directions on 6 processes * The Ping Pong measurements were done on * - 30 pairs of processes for latency benchmarking, and * - 30 pairs of processes for bandwidth benchmarking, * out of 6*(6-1) = 30 possible combinations on 6 processes. * (1 MB/s = 10**6 byte/sec) * * ------------------------------------------------------------------ * * * ----------------------------------------------------------------------- * * Updates * - from 1.5.1.0 to 1.5.1.1: additional arguments in params * - bench_lat_bw_1.5.1.1.c = used as hpcc0.6beta/src/bench_lat_bw_1.5.1.c * - from 1.5.1.1 to 1.5.1.2: only pretty print, without any TAB * - from 1.5.1.2 to 1.5.2 fixed bug in usage of loop_length_proposal and loop_length * * ----------------------------------------------------------------------- */ #include /* global vars */ FILE *OutFile; double wtick; #define WTICK_FACTOR 10 /* Message Tags */ #define PING 100 #define PONG 101 #define NEXT_CLIENT 102 #define TO_RIGHT 200 #define TO_LEFT 201 #ifndef CHECK_LEVEL # define CHECK_LEVEL 1 #endif #ifndef DEBUG_LEVEL # define DEBUG_LEVEL 2 #endif typedef struct { int msglen; double ring_lat; double ring_bwidth; double rand_lat; double rand_bwidth; } BenchmarkResult; /* measurement results, used only on rank 0 */ static void SumLongLong(void *invec, void *inoutvec, int *len, MPI_Datatype *datatype) { int i, n = *len; long long *invecll = (long long *)invec, *inoutvecll = (long long *)inoutvec; for (i = n; i; i--, invecll++, inoutvecll++) *inoutvecll += *invecll; } /* ----------------------------------------------------------------------- * Routine: cross_ping_pong_set() * * Task: PingPong benchmark * to compute minimum, maximum and average latency and bandwidth * over the connections on all (some) pairs of processes. * * Input: * client_rank_low, client_rank_high, client_rank_stride, * server_rank_low, server_rank_high, server_rank_stride, * flag -1 = only client_rank < server_rank * 0 = only client_rank != server_rank * +1 = only client_rank > server_rank * msg_length * loop_length * number_of_measurements * * Output: * latency_min, latency_avg, latency_max * bandwidth_min, bandwidth_avg, bandwidth_max (if msg_length > 0) * (min, max, avg are done over all pairs of processes) * (after minimum of the latency over all measurements of same pair) * * Task: * * Overview: * - initialization: * Client/Server_rank_low, .._rank_high and .._rank_stride define a set * of client ranks and a set of server ranks. * Client_rank_high is lowered and server_rank_low is enlarged * if a multiple of the strides does not fit. * - execution of the ping-pong benchmarks: * Between each pair of client and server rank (out of the sets) * a ping pong benchmark with a fixed message length is done. * All ping_pong benchmarks are serialized, i.e., never two * process pairs are benchmarked at the same time. * To achieve a minimum of disturbance, each process not involved in * a ping pong must be in the status of waiting for a message. * This principle is fulfilled by sending additional token messages * from a previous client process to the next client process. * - evaluation: * All benchmark results must be stored locally before a total * evaluation can be done, because in the execution sequence, * the iteration over the number_of_measurements is the outer-most * loop, while in the evaluation this loop is the inner-most. * * Execution a sequence of the ping pong benchmarks: * for (i_meas=0; i_meas < number_of_measurements; i_meas++) * { * for (client_rank=client_rank_low; client_rank <= client_rank_high; client_rank++client_rank_stride) * { * // the following message receives a token indicating the right to send messages to server processes * if ((myrank == client_rank) && (client_rank > client_rank_low)) * MPI_Recv( >>>.... from client_rank-client_rank_stride ); * for (server_rank=server_rank_low; server_rank <= server_rank_high; server_rank++server_rank_stride) * { * if ( (flag<0 ? client_rank < server_rank : * (flag>0 ? client_rank > server_rank : client_rank != server_rank ) ) ) * { * PingPongLoop(...); * } * } * // the following message sends a token indicating the right to send messages to server processes * if ((myrank == client_rank) && (client_rank < client_rank_high)) * MPI_Send( >>>.... from client_rank+client_rank_stride ); * MPI_Bcast( >>> ... root=client_rank_high ); * } * } * * with PingPongLoop(...) * { * if (myrank == client_rank) * { * for (i_loop=-1; i_loop < loop_length; i_loop++) * { * if (i_loop==0) start_time=MPI_Wtime(); * >>> send ping from client_rank to server_rank * >>> recv pong from server_rank * } * end_time=MPI_Wtime(); * lat_one_meas = end_time-start_time; * bw_one_meas = message_length/lat_one_meas; * >>> store measurement results in the list * } * if (myrank == server_rank) * { * for (i_loop=-1; i_loop < loop_length; i_loop++) * { * >>> recv ping from client_rank * >>> send pong from server_rank to client_rank * } * } * } * * * Evaluation sequence: * latency_min/avg/max * = min/avg/max over all process pairs * of (min over all measurements of lat_one_meas) * bandwidth_min/avg/max * = min/avg/max over all process pairs * of (msg_length / (min over all measurements of lat_one_meas)) * * Caution: Execution and evaluation sequence are different. * Therefore, each client has to store all measurement results * for all pairs locally, before it can calculate the evaluation * sequence (after all ping-pongs were done). * * Remarks: * - With using the tokens, there is never a message outstanding * that i not part of a currently running PingPong. * - Processes not involved in a current PingPong are waiting * for the first ping message (if their next role is to be a server process) * or for the token (if their next role is to be a client process). * - At the beginning, a barrier is called * - At the end, the last client process initiates a Bcast. * * Communication scheme: * * Example with client and server rank_low=0, rank_high=11, and rank_stride=3 * and flag=0 * * Rank: 0 1 2 3 4 5 6 7 8 9 10 11 * * Role: C C C C * S S S S * * Protcol: ------------------------BARRIER------------------------- * C==<======S * C==<=====================S * C==<====================================S * C==<===================================================S * s------------->r * S=>==C * C==<======S * C==<=====================S * C==<====================================S * s------------->r * S================>==C * S=>==C * C==<======S * C==<=====================S * s------------->r * S===============================>==C * S================>==C * S=>==C * C===<=====S * --------------------------------------------Bcast------- * * With * --BARRIER-- MPI_Barrier(MPI_COMM_WORLD) * C==<======S Client Server Ping Pong Loop, with client_rank < server_rank, * executed only if (flag <= 0) * S======>==C Client Server Ping Pong Loop, with client_rank > server_rank, * executed only if (flag >= 0) * -----Bcast- MPI_Bcast(MPI_COMM_WORLD) with root = last client rank * * ----------------------------------------------------------------------- */ static void cross_ping_pong_set( int client_rank_low, int client_rank_high, int client_rank_stride, int server_rank_low, int server_rank_high, int server_rank_stride, int msg_length, int loop_length, int number_of_measurements, int flag, double *latency_min, double *latency_avg, double *latency_max, double *bandwidth_min, double *bandwidth_avg, double *bandwidth_max, long long *total_number_of_pairs) { MPI_Status status; int client_rank, server_rank; int i_meas; int i_loop, i; unsigned char *sndbuf, *rcvbuf; double end_time, start_time, lat_one_meas; double *local_results; double lat, bw; int result_index; long long number_of_results; int size, myrank; double loc_latency_min; double loc_latency_avg; double loc_latency_max; double loc_bandwidth_min; double loc_bandwidth_avg; double loc_bandwidth_max; MPI_Op sumll; int meas_ok; #if (CHECK_LEVEL >= 1) register int base; #endif /* get number of processors and own rank */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* check the benchmark parameter */ if (client_rank_low < 0) client_rank_low = 0; if (client_rank_high >= size) client_rank_high = size-1; client_rank_high = (client_rank_high-client_rank_low) / client_rank_stride*client_rank_stride + client_rank_low; if (server_rank_low < 0) server_rank_low = 0; if (server_rank_high >= size) server_rank_high = size-1; server_rank_low = server_rank_high - (server_rank_high-server_rank_low)/server_rank_stride*server_rank_stride; local_results = (double *) malloc( ((server_rank_high - server_rank_low)/server_rank_stride+1) * number_of_measurements * sizeof(double) ); /* set the initial result index*/ result_index = 0; /* get memory for the send/recv buffer */ sndbuf = (unsigned char *) malloc (msg_length); rcvbuf = (unsigned char *) malloc (msg_length); number_of_results = 0; /* do the measurements */ for (i_meas=0; i_meas < number_of_measurements; i_meas++) { result_index = 0; for (client_rank=client_rank_low; client_rank <= client_rank_high; client_rank += client_rank_stride) { /* the following message receives a token indicating the right to send * messages to server processes */ if ((myrank == client_rank) && (client_rank > client_rank_low)) MPI_Recv (rcvbuf, 0, MPI_BYTE, client_rank - client_rank_stride, NEXT_CLIENT, MPI_COMM_WORLD, &status); /* measurement loop */ for (server_rank = server_rank_low; server_rank <= server_rank_high; server_rank += server_rank_stride) { if (((flag <= 0) && (server_rank > client_rank)) || ((flag >= 0) && (server_rank < client_rank))) { if (server_rank==client_rank) fprintf( OutFile, "ALARM\n"); if (myrank == client_rank) { do { meas_ok = 0; /* communicate loop_length to server_rank */ MPI_Send (&loop_length, 1, MPI_INT, server_rank, PING, MPI_COMM_WORLD); for (i_loop = -1; i_loop < loop_length; i_loop++) { if (i_loop == 0) start_time = MPI_Wtime (); /* send ping from client_rank to server_rank */ #if (CHECK_LEVEL >= 1) base = (i_loop + myrank + 1)&0x7f; /* = mod 128 */ sndbuf[0] = base; sndbuf[msg_length-1] = base+1; # if (CHECK_LEVEL >= 2) /* check the check: use a wrong value on process 1 */ if (myrank == 1) sndbuf[0] = sndbuf[0] + 11; # endif #endif MPI_Send (sndbuf, msg_length, MPI_BYTE, server_rank, PING, MPI_COMM_WORLD); /* recv pong from server_rank */ MPI_Recv (rcvbuf, msg_length, MPI_BYTE, server_rank, PONG, MPI_COMM_WORLD, &status); #if (CHECK_LEVEL >= 1) /* check returned values must be +13 of origin */ if (rcvbuf[0] != base+13 || rcvbuf[msg_length-1] != base + 14 ) { fprintf( OutFile, "[%d]: ERROR: expected %u and %u as first and last byte, but got %u and %u instead\n", myrank, base+13, base+14, rcvbuf[0], rcvbuf[msg_length-1] ); fflush( OutFile ); } #endif } end_time = MPI_Wtime (); lat_one_meas = end_time - start_time; if (lat_one_meas < WTICK_FACTOR * wtick) { if (loop_length == 1) loop_length = 2; else loop_length = loop_length * 1.5; } else meas_ok = 1; MPI_Send (&meas_ok, 1, MPI_INT, server_rank, PING, MPI_COMM_WORLD); } while (!meas_ok); /* fprintf ( OutFile, "CrossPingPong: Client = %d, Server = %d, " "Latency = %f us \n", client_rank, server_rank, (lat_one_meas * 1e6) / (2 * loop_length)); */ fflush (OutFile); /* workaround to fix problems with MPI_Wtime granularity */ if (!lat_one_meas) { static int complain = 0; lat_one_meas = wtick; if (complain != loop_length) { #define MSG "In " __FILE__ ", routine bench_lat_bw, the 3rd parameter to cross_ping_pong_controlled was %d; increase it.\n" fprintf (stderr, MSG, loop_length); fprintf (OutFile, MSG, loop_length); #undef MSG } complain = loop_length; } /* store measurement results in the list */ local_results [i_meas*number_of_results + result_index] = lat_one_meas / (loop_length*2); result_index++; } if (myrank == server_rank) { do { meas_ok = 0; /* recv the loop_length from client_rank */ MPI_Recv (&loop_length, 1, MPI_INT, client_rank, PING, MPI_COMM_WORLD, &status); for (i_loop = -1; i_loop < loop_length; i_loop++) { /* recv ping from client_rank */ MPI_Recv (rcvbuf, msg_length, MPI_BYTE, client_rank, PING, MPI_COMM_WORLD, &status); #if (CHECK_LEVEL >= 1) /* server returns received value + const */ sndbuf[0] = rcvbuf[0] + 13; sndbuf[msg_length-1] = rcvbuf[msg_length-1] + 13; # if (CHECK_LEVEL >= 2) /* check the check: use a wrong value on process 1 */ if (myrank == 1) sndbuf[msg_length-1] = sndbuf[msg_length-1] + 22; # endif #endif /* send pong from server_rank to client_rank */ MPI_Send (sndbuf, msg_length, MPI_BYTE, client_rank, PONG, MPI_COMM_WORLD); } MPI_Recv (&meas_ok, 1, MPI_INT, client_rank, PING, MPI_COMM_WORLD, &status); } while(!meas_ok); } } } /* the following message sends a token indicating the right to send * messages to server processes */ if ((myrank == client_rank) && (client_rank < client_rank_high)) MPI_Send (sndbuf, 0, MPI_BYTE, client_rank + client_rank_stride, NEXT_CLIENT, MPI_COMM_WORLD); MPI_Bcast (sndbuf, 0, MPI_BYTE, client_rank_high, MPI_COMM_WORLD); } number_of_results = result_index; } /* free the send/recv buffer */ free (sndbuf); free (rcvbuf); /* compute local min, max and avg on all client processes */ /* gather minimal latency for all indexes in first measurement of all measurements */ for ( i = 0; i < number_of_results; i++ ) for (i_meas = 1; i_meas < number_of_measurements; i_meas++) if ( local_results[i_meas*number_of_results+i] < local_results[i] ) local_results[i] = local_results[i_meas*number_of_results+i]; loc_latency_min = 1e99; loc_latency_avg = 0; loc_latency_max = 0; loc_bandwidth_min = 1e99; loc_bandwidth_avg = 0; loc_bandwidth_max = 0; for (i=0; i < number_of_results; i++) { lat = local_results[i]; bw = msg_length / lat; #if (DEBUG_LEVEL >= 3) if ((myrank == 0) || (DEBUG_LEVEL >= 4)) { fprintf ( OutFile, "[%d] i=%d, lat=%10.6fms, bw=%10.6fMB/s\n", myrank, i, lat*1e3, bw/1e6); fflush( OutFile ); } #endif if (lat < (loc_latency_min)) loc_latency_min = lat; loc_latency_avg = loc_latency_avg + lat; if (lat > (loc_latency_max)) loc_latency_max = lat; if (bw < (loc_bandwidth_min)) loc_bandwidth_min = bw; loc_bandwidth_avg = loc_bandwidth_avg + bw; if (bw > (loc_bandwidth_max)) loc_bandwidth_max = bw; } #if (DEBUG_LEVEL >= 3) if ((myrank == 0) || (DEBUG_LEVEL >= 4)) { fprintf ( OutFile, "[%d] Latency min / avg / max: %10.6f / %10.6f / %10.6f msecs\n", myrank, loc_latency_min * 1e3, loc_latency_avg / number_of_results * 1e3, loc_latency_max * 1e3); fflush( OutFile ); fprintf ( OutFile, "[%d] Bandwidth min / avg / max: %10.3f / %10.3f / %10.3f MByte/s\n\n", myrank, loc_bandwidth_min / 1e6, loc_bandwidth_avg / number_of_results / 1e6, loc_bandwidth_max / 1e6); fflush( OutFile ); } #endif /* free the local result list */ free (local_results); /* send all local results to process 0 */ MPI_Op_create( SumLongLong, 1, &sumll ); MPI_Reduce (&number_of_results, total_number_of_pairs, 1, MPI_LONG_LONG_INT, sumll, 0, MPI_COMM_WORLD); MPI_Op_free( &sumll ); MPI_Reduce (&loc_latency_min, latency_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce (&loc_latency_avg, latency_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce (&loc_latency_max, latency_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); MPI_Reduce (&loc_bandwidth_min, bandwidth_min, 1, MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); MPI_Reduce (&loc_bandwidth_avg, bandwidth_avg, 1, MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); MPI_Reduce (&loc_bandwidth_max, bandwidth_max, 1, MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); /* compute global average on process 0 */ if ((myrank == 0) && (*total_number_of_pairs > 0)) { *latency_avg= *latency_avg / (*total_number_of_pairs); *bandwidth_avg= *bandwidth_avg / (*total_number_of_pairs); } #if (DEBUG_LEVEL >= 2) /* print the results */ if (myrank == 0) { fprintf ( OutFile, "Message Length: %d\n", msg_length); fprintf ( OutFile, "Latency min / avg / max: %10.6f / %10.6f / %10.6f msecs\n", *latency_min * 1e3, *latency_avg * 1e3, *latency_max * 1e3); fprintf ( OutFile, "Bandwidth min / avg / max: %10.3f / %10.3f / %10.3f MByte/s\n\n", *bandwidth_min / 1e6, *bandwidth_avg / 1e6, *bandwidth_max / 1e6); fflush( OutFile ); } #endif } /* ----------------------------------------------------------------------- * Routine: cross_ping_pong_controlled() * * Task: Choose a set of input arguments for PingPongSet * to benchmark the minimal/average/maximal latency and * bandwidth of a system based on a given amount of time. * * Input: * max_time, msg_length, loop_length, number_of_measurements * * Output: * latency_min, latency_avg, latency_max * bandwidth_min, bandwidth_avg, bandwidth_max * (min, max, avg are done over all pairs of processes) * * Execution task: * - benchmarking latency and bandwidth for msg_length byte * of communication with client_rank=0 and server_rank=size-1 * - calculating client and server rank_stride to guarantee, that * - PingPongSet does not need more than max_time sec * ----------------------------------------------------------------------- */ static void cross_ping_pong_controlled( double max_time, int msg_length, int loop_length, int number_of_measurements, double *latency_min, double *latency_avg, double *latency_max, double *bandwidth_min, double *bandwidth_avg, double *bandwidth_max, long long *number_of_pairs ) { int size, myrank, i; double l_dum_min, l_dum_max; /* dummies */ double b_dum_min, b_dum_avg, b_dum_max; /* dummies */ long long dum_num_results; /* dummies */ int stride; double lat_msg; int max_pings, not_prime; long long max_pairs; /* basic MPI initialization */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); cross_ping_pong_set( 0,0,1, size-1,size-1,1, msg_length, loop_length, number_of_measurements, 0, &l_dum_min, &lat_msg, &l_dum_max, &b_dum_min, &b_dum_avg, &b_dum_max, &dum_num_results); if ( myrank == 0 ) { if (lat_msg*2*(loop_length+1) >= WTICK_FACTOR*wtick) { max_pairs = max_time / (lat_msg*2*(loop_length+1)*number_of_measurements); fprintf( OutFile, "MPI_Wtime granularity is ok.\n"); } else { max_pairs = max_time / (WTICK_FACTOR*wtick*number_of_measurements); fprintf( OutFile, "Use MPI_Wtick for estimation of max pairs\n"); fflush( OutFile ); } max_pings = (int)sqrt( (double)max_pairs ); if ( max_pings < 5 ) max_pings = 5; stride = 1.0 * size / max_pings + 0.9; if ( stride < 1 ) stride = 1; if ( stride == 2) stride = 3; if ( stride > 3 ) { while ( 1 ) { not_prime = 0; for ( i = 2; i < stride; i++ ) if ( (stride % i) == 0 ) { not_prime = 1; break; } if ( not_prime ) if ( stride > (size/3) ) break; else stride++; else break; } } #if (DEBUG_LEVEL >= 2) fprintf( OutFile, "message size: %10d\n", msg_length ); fprintf( OutFile, "max time : %10.6f secs\n", max_time ); fprintf( OutFile, "latency for msg: %10.6f msecs\n", lat_msg*1e3 ); fprintf( OutFile, "estimation for ping pong: %10.6f msecs\n", lat_msg*2*(loop_length+1)*number_of_measurements*1e3); fprintf( OutFile, "max number of ping pong pairs = %10.0f\n", 1.0*max_pairs ); fprintf( OutFile, "max client pings = max server pongs = %10d\n", max_pings ); fprintf( OutFile, "stride for latency = %10d\n", stride ); fflush( OutFile ); #endif } MPI_Bcast ( &stride, 1, MPI_INT, 0, MPI_COMM_WORLD); cross_ping_pong_set( 0, size-1, stride, 0, size-1, stride, msg_length, loop_length, number_of_measurements, 0, latency_min, latency_avg, latency_max, bandwidth_min, bandwidth_avg, bandwidth_max, number_of_pairs); } /* ----------------------------------------------------------------------- * Routine: ring_lat_bw_loop() * * * Task: Communicate to left and right partner in rand_pattern_count * random rings and the naturally ordered ring. Reduce the maximum * of all measurements over all processors to rank 0 and get the * minimal measurement on it. Compute naturally ordered and avg * randomly ordered latency and bandwidth. * * Input: * msglen, measurements, loop_length, rand_pattern_count * * Output: * result->msglen, result->ring_lat, result->rand_lat, * result->ring_bwidth, result->rand_bwidth * * Execution Tasks: * * - loop loop_length * measurements times and do Irecv,Isend to left * and right partner as well as Sendrecv and save the minimum of both * latencies for all rings. * - Reduce all measurements*(rand_pattern_count+1) latencies to rank 0 * and get minimal measurement on it. * - Compute latencies and bandwidth. For random order the geometric average * of the latency is built. * ----------------------------------------------------------------------- */ static void ring_lat_bw_loop( int msglen, int measurements, int loop_length_proposal, int rand_pattern_count, BenchmarkResult *result ) { int i_meas, i_pat, i_loop, i, j; double start_time, end_time, lat_sendrecv, lat_nonblocking; double *latencies; /* measurements * (rand_pattern_count+1) */ double *max_latencies; /* reduced from all processors with MPI_MAX on rank 0 */ double avg_latency; /* of random pattern rings */ int *ranks; /* communication pattern, order of processors */ int size, myrank, left_rank, right_rank; MPI_Request requests[4]; MPI_Status statuses[4]; unsigned char *sndbuf_left, *sndbuf_right, *rcvbuf_left, *rcvbuf_right; long seedval; double rcp = 1.0 / RAND_MAX; int loop_length; int meas_ok, meas_ok_recv; #if (CHECK_LEVEL >= 1) register int base; #endif /* get number of processors and own rank */ MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); /* alloc memory and init with 0 */ latencies = (double *)malloc( measurements * (rand_pattern_count+1) * sizeof( *latencies ) ); max_latencies = (double *)malloc( measurements * (rand_pattern_count+1) * sizeof( *max_latencies ) ); ranks = (int *)malloc( size * sizeof( *ranks ) ); sndbuf_left = (unsigned char *)malloc( msglen ); sndbuf_right = (unsigned char *)malloc( msglen ); rcvbuf_left = (unsigned char *)malloc( msglen ); rcvbuf_right = (unsigned char *)malloc( msglen ); /* init pseudo-random with time seed */ seedval=(long)(time((time_t *) 0)); #if (DEBUG_LEVEL >= 3) if (myrank==0) { fprintf( OutFile, "seedval = %ld\n",seedval); fflush( OutFile ); } #endif /* benchmark */ for ( i_meas = 0; i_meas < measurements; i_meas++ ) { srand(seedval); for ( i_pat = 0; i_pat < rand_pattern_count+1; i_pat++ ) { /* build pattern at rank 0 and broadcast to all */ if ( myrank == 0 ) { if (i_pat>0) { /* random pattern */ for (i=0; i= 3) if ( i_meas == 0 ) { fprintf( OutFile, "i_pat=%3d: ",i_pat); for (i=0; i= 1) base = (i_loop + myrank + 1)&0x7f; /* = mod 128 */ sndbuf_right[0] = base; sndbuf_right[msglen-1] = base+1; sndbuf_left[0] = base+2; sndbuf_left[msglen-1] = base+3; # if (CHECK_LEVEL >= 2) /* check the check: use a wrong value on process 1 */ if (myrank == 1) sndbuf_right[0] = sndbuf_right[0] + 33; if (myrank == 1) sndbuf_left[msglen-1] = sndbuf_left[msglen-1] + 44; # endif #endif MPI_Sendrecv( sndbuf_right, msglen, MPI_BYTE, right_rank, TO_RIGHT, rcvbuf_left, msglen, MPI_BYTE, left_rank, TO_RIGHT, MPI_COMM_WORLD, &(statuses[0]) ); MPI_Sendrecv( sndbuf_left, msglen, MPI_BYTE, left_rank, TO_LEFT, rcvbuf_right, msglen, MPI_BYTE, right_rank, TO_LEFT, MPI_COMM_WORLD, &(statuses[1]) ); #if (CHECK_LEVEL >= 1) /* check whether bytes are received correctly */ base = (i_loop + left_rank + 1)&0x7f; /* = mod 128 */ if ( rcvbuf_left[0] != base || rcvbuf_left[msglen-1] != base+1 ) { fprintf( OutFile, "[%d]: ERROR: from right: expected %u and %u as first and last byte, but got %u and %u instead\n", myrank, base, base+1, rcvbuf_left[0], rcvbuf_left[msglen-1] ); fflush( OutFile ); } base = (i_loop + right_rank + 1)&0x7f; /* = mod 128 */ if ( rcvbuf_right[0] != base+2 || rcvbuf_right[msglen-1] != base + 3 ) { fprintf( OutFile, "[%d]: ERROR: from right: expected %u and %u as first and last byte, but got %u and %u instead\n", myrank, base+2, base+3, rcvbuf_right[0], rcvbuf_right[msglen-1] ); fflush( OutFile ); } #endif } end_time = MPI_Wtime(); if ((end_time-start_time) < WTICK_FACTOR * wtick) { if (loop_length_proposal == 1) loop_length_proposal = 2; else loop_length_proposal = loop_length_proposal * 1.5; } else meas_ok=1; MPI_Allreduce (&meas_ok, &meas_ok_recv, 1, MPI_INT, MPI_MAX, MPI_COMM_WORLD); meas_ok = meas_ok_recv; } while (!meas_ok); lat_sendrecv = (end_time-start_time) / (2 * loop_length); /* communication loop with non-blocking routines, and previous loop_length */ for ( i_loop = -1; i_loop < loop_length; i_loop++ ) { if ( i_loop == 0 ) start_time = MPI_Wtime(); #if (CHECK_LEVEL >= 1) /* communicate to left and right partner */ base = (i_loop + myrank + 1)&0x7f; /* = mod 128 */ sndbuf_right[0] = base; sndbuf_right[msglen-1] = base+1; sndbuf_left[0] = base+2; sndbuf_left[msglen-1] = base+3; #endif /* irecv left */ MPI_Irecv( rcvbuf_left, msglen, MPI_BYTE, left_rank, TO_RIGHT, MPI_COMM_WORLD, &requests[0] ); /* irecv right */ MPI_Irecv( rcvbuf_right, msglen, MPI_BYTE, right_rank, TO_LEFT, MPI_COMM_WORLD, &requests[1] ); /* isend right */ MPI_Isend( sndbuf_right, msglen, MPI_BYTE, right_rank, TO_RIGHT, MPI_COMM_WORLD, &requests[2] ); /* isend left */ MPI_Isend( sndbuf_left, msglen, MPI_BYTE, left_rank, TO_LEFT, MPI_COMM_WORLD, &requests[3] ); /* waitall */ MPI_Waitall( 4, requests, statuses ); #if (CHECK_LEVEL >= 1) /* check whether both transfers were done right */ base = (i_loop + left_rank + 1)&0x7f; /* = mod 128 */ if ( rcvbuf_left[0] != base || rcvbuf_left[msglen-1] != base+1 ) { fprintf( OutFile, "[%d]: ERROR: from right: expected %u and %u as first and last byte, but got %u and %u instead\n", myrank, base, base+1, rcvbuf_left[0], rcvbuf_left[msglen-1] ); fflush( OutFile ); } base = (i_loop + right_rank + 1)&0x7f; /* = mod 128 */ if ( rcvbuf_right[0] != base+2 || rcvbuf_right[msglen-1] != base + 3 ) { fprintf( OutFile, "[%d]: ERROR: from right: expected %u and %u as first and last byte, but got %u and %u instead\n", myrank, base+2, base+3, rcvbuf_right[0], rcvbuf_right[msglen-1] ); fflush( OutFile ); } #endif } end_time = MPI_Wtime(); lat_nonblocking = (end_time-start_time) / ( 2 * loop_length ); /* workaround to fix problems with MPI_Wtime granularity */ if (!lat_nonblocking) { static int complain = 0; lat_nonblocking = wtick; if (complain != loop_length) { #define MSG "In " __FILE__ ", routine bench_lat_bw, the 3rd parameter to ring_lat_bw_loop was %d; increase it.\n" fprintf( stderr, MSG, loop_length); fprintf( OutFile, MSG, loop_length); #undef MSG } complain = loop_length; } latencies[i_meas*(rand_pattern_count+1)+i_pat] = (lat_sendrecv < lat_nonblocking ? lat_sendrecv : lat_nonblocking); } } #if (DEBUG_LEVEL >= 5) if ((myrank == 0) || (DEBUG_LEVEL >= 6)) { fprintf( OutFile, "RANK %3d: ", myrank ); for ( i = 0; i < measurements*(rand_pattern_count+1); i++ ) fprintf( OutFile, "%e ", latencies[i] ); fprintf( OutFile, "\n" ); fflush( OutFile ); } #endif /* reduce all vectors to get maximum vector at rank 0 */ MPI_Reduce( latencies, max_latencies, measurements * (rand_pattern_count+1), MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD ); #if (DEBUG_LEVEL >= 5) fflush(stdout); MPI_Barrier(MPI_COMM_WORLD); if (myrank==0) { fprintf( OutFile, "RANK ---: " ); for ( i = 0; i < measurements*(rand_pattern_count+1); i++ ) fprintf( OutFile, "%e ", max_latencies[i] ); fprintf( OutFile, "\n" ); fflush( OutFile ); } #endif /* get minimal measurement from vector as final measurement and compute latency and bandwidth */ if ( myrank == 0 ) { /* reduce measurements to first minimal measurement */ for ( i_pat = 0; i_pat < rand_pattern_count+1; i_pat++ ) { for (i_meas = 1; i_meas < measurements; i_meas++) { /* minimal latencies over all measurements */ if (max_latencies[i_meas*(rand_pattern_count+1)+i_pat] < max_latencies[i_pat]) max_latencies[i_pat] = max_latencies[i_meas*(rand_pattern_count+1)+i_pat]; } } /* get average latency of random rings by geometric means */ avg_latency = 0; for ( i_pat = 1; i_pat < rand_pattern_count+1; i_pat++ ) avg_latency += log( max_latencies[i_pat] ); avg_latency = avg_latency / rand_pattern_count; avg_latency = exp( avg_latency ); /* compute final benchmark results */ result->msglen = msglen; result->ring_lat = max_latencies[0]; result->ring_bwidth = msglen / max_latencies[0]; result->rand_lat = avg_latency; result->rand_bwidth = msglen / avg_latency; } /* free memory */ free( ranks ); free( latencies ); free( max_latencies ); free(sndbuf_left); free(sndbuf_right); free(rcvbuf_left); free(rcvbuf_right); #if (DEBUG_LEVEL >= 2) if (myrank == 0) { fprintf( OutFile, "Message Size: %13d Byte\n", result->msglen ); fprintf( OutFile, "Natural Order Latency: %13.6f msec\n", result->ring_lat*1e3 ); fprintf( OutFile, "Natural Order Bandwidth: %13.6f MB/s\n", result->ring_bwidth/1e6 ); fprintf( OutFile, "Avg Random Order Latency: %13.6f msec\n", result->rand_lat*1e3 ); fprintf( OutFile, "Avg Random Order Bandwidth: %13.6f MB/s\n", result->rand_bwidth/1e6 ); fprintf( OutFile, "\n" ); fflush( OutFile ); } #endif } /* ----------------------------------------------------------------------- * Routine: bench_lat_bw() * * Task: Run cross_ping_pong_controlled and ring_lat_bw_loop * with a well chosen number of loops and measurements * to benchmark the minimal/average/maximal latency and * bandwidth of a system based on a given amount of time. * * Input: * max_time_for_latency, max_time_for_bandwidth * * Output: * msg_length_for_lat, msg_length_for_bw, * latency_min, latency_avg, latency_max, * bandwidth_min, bandwidth_avg, bandwidth_max, * number_of_pairs_for_lat, number_of_pairs_for_bw, * (min, max, avg are done over all pairs of processes for * ping pong benchmarking) * ring_lat, ring_bw, rand_lat, rand_bw * (for ring benchmarking) * * Execution Tasks: * - run cross_ping_pong_controlled for 8 and 2000000 bytes * - run ring_lat_bw_loop for 8 and 2000000 bytes * - use the results from message length 8 byte for latency * and the results from message length 2000000 for bandwidth * ----------------------------------------------------------------------- */ static void bench_lat_bw( double max_time_for_latency, /* for ping pong */ double max_time_for_bandwidth, /* for ping pong */ int *msg_length_for_lat, int *msg_length_for_bw, double *latency_min, /* */ double *latency_avg, /* ping pong measurement latency */ double *latency_max, /* */ long long *number_of_pairs_for_lat, /* ping pong */ double *bandwidth_min, /* */ double *bandwidth_avg, /* ping pong measurement bandwidth */ double *bandwidth_max, /* */ long long *number_of_pairs_for_bw, /* ping pong */ double *ring_lat, /* naturally ordered ring latency */ double *rand_lat, /* randomly ordered ring latency */ double *ring_bw, /* randomly ordered ring bandwidth */ double *rand_bw /* naturally ordered ring bandwidth */ ) { double l_dum_min, l_dum_avg, l_dum_max; /* dummies */ double b_dum_min, b_dum_avg, b_dum_max; /* dummies */ BenchmarkResult result_lat, result_bw; double wtick_recv; # if (DEBUG_LEVEL >= 1) int size, myrank; double wtime_total, wtime_cross_lat, wtime_cross_bw, wtime_ring_lat, wtime_ring_bw; # endif *msg_length_for_lat = 8; *msg_length_for_bw = 2000000; # if (DEBUG_LEVEL >= 1) MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); # endif /* get the granularity of MPI_Wtime, but don't trust MPI_Wtick!! */ wtick = MPI_Wtick(); # ifdef SET_WTICK wtick = SET_WTICK ; # endif if (wtick < 0) wtick = -wtick; MPI_Allreduce (&wtick, &wtick_recv, 1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD); wtick = wtick_recv; # if (DEBUG_LEVEL >= 1) if (myrank == 0) { fprintf( OutFile, "MPI_Wtime granularity.\n"); fprintf( OutFile, "Max. MPI_Wtick is %f sec\n", wtick); } # endif if (wtick < 1e-6) wtick = 1e-6; if (wtick > 0.01) wtick = 0.01; # if (DEBUG_LEVEL >= 1) if (myrank == 0) { fprintf( OutFile, "wtick is set to %f sec \n\n", wtick); fflush( OutFile ); } # endif /* ping pong */ /* --------- */ # if (DEBUG_LEVEL >= 1) wtime_total = - MPI_Wtime(); wtime_cross_lat = - MPI_Wtime(); # endif cross_ping_pong_controlled( max_time_for_latency, *msg_length_for_lat, 8, 5, latency_min, latency_avg, latency_max, &b_dum_min, &b_dum_avg, &b_dum_max, number_of_pairs_for_lat ); # if (DEBUG_LEVEL >= 1) wtime_cross_lat += MPI_Wtime(); wtime_cross_bw = - MPI_Wtime(); # endif cross_ping_pong_controlled( max_time_for_bandwidth, *msg_length_for_bw, 1, 2, &l_dum_min, &l_dum_avg, &l_dum_max, bandwidth_min, bandwidth_avg, bandwidth_max, number_of_pairs_for_bw ); # if (DEBUG_LEVEL >= 1) wtime_cross_bw += MPI_Wtime(); # endif /* ring */ # if (DEBUG_LEVEL >= 1) wtime_ring_lat = - MPI_Wtime(); # endif ring_lat_bw_loop( *msg_length_for_lat, 8, 5, 30, &result_lat ); *ring_lat = result_lat.ring_lat; *rand_lat = result_lat.rand_lat; # if (DEBUG_LEVEL >= 1) wtime_ring_lat += MPI_Wtime(); wtime_ring_bw = - MPI_Wtime(); # endif ring_lat_bw_loop( *msg_length_for_bw, 3, 2, 10, &result_bw ); *ring_bw = result_bw.ring_bwidth; *rand_bw = result_bw.rand_bwidth; # if (DEBUG_LEVEL >= 1) wtime_ring_bw += MPI_Wtime(); wtime_total += MPI_Wtime(); # endif # if (DEBUG_LEVEL >= 1) if (myrank==0) { fprintf( OutFile, "Execution time (wall clock) = %9.3f sec on %d processes\n", wtime_total, size); fprintf( OutFile, " - for cross ping_pong latency = %9.3f sec\n", wtime_cross_lat); fprintf( OutFile, " - for cross ping_pong bandwidth = %9.3f sec\n", wtime_cross_bw ); fprintf( OutFile, " - for ring latency = %9.3f sec\n", wtime_ring_lat); fprintf( OutFile, " - for ring bandwidth = %9.3f sec\n", wtime_ring_bw ); fflush( OutFile ); } # endif } /* ----------------------------------------------------------------------- * Routine: bench_lat_bw_print() * * Task: Print out the benchmark results and conditions from * bench_lat_bw. * * Input: * none * * Output: * none * * Execution Tasks: * - run bench_lat_bw * - print out the five most important values: * - max ping pong latency * - min ping pong bandwidth * - randomly ordered ring latency * - naturally ordered ring bandwidth * - randomly ordered ring bandwidth * - print all benchmark results on debug level 1 * - print benchmark conditions: * - number of processors * - message lengths * - number of ping pong pairs * ----------------------------------------------------------------------- */ static void bench_lat_bw_print(double *MaxPingPongLatency, double *RandomlyOrderedRingLatency, double *MinPingPongBandwidth, double *NaturallyOrderedRingBandwidth, double *RandomlyOrderedRingBandwidth, double *MinPingPongLatency, double *AvgPingPongLatency, double *MaxPingPongBandwidth, double *AvgPingPongBandwidth, double *NaturallyOrderedRingLatency) { int msg_length_for_lat; int msg_length_for_bw; double ring_lat, rand_lat; double ring_bw, rand_bw; int size, myrank; double max_time_for_latency; double max_time_for_bandwidth; double latency_min; double latency_avg; double latency_max; double bandwidth_min; double bandwidth_avg; double bandwidth_max; long long number_of_pairs_for_lat, number_of_pairs_for_bw; MPI_Comm_size(MPI_COMM_WORLD, &size); MPI_Comm_rank(MPI_COMM_WORLD, &myrank); # if (DEBUG_LEVEL >= 1) if (myrank == 0 ) { fprintf ( OutFile, "\n------------------------------------------------------------------\n"); fprintf ( OutFile, "Latency-Bandwidth-Benchmark R1.5.1 (c) HLRS, University of Stuttgart\n"); fprintf ( OutFile, "Written by Rolf Rabenseifner, Gerrit Schulz, and Michael Speck, Germany\n\n"); fprintf ( OutFile, "Details - level %d\n", DEBUG_LEVEL); fprintf ( OutFile, "-----------------\n\n"); fflush( OutFile ); } # endif /* The following timings are used for the cross ping pong. Additionally, about 300 seconds (on a 100 MB/s) are necessary for benchmarking the ring patterns. */ max_time_for_latency = 10.0 /*sec*/; max_time_for_bandwidth = 30.0 /*sec*/; bench_lat_bw( max_time_for_latency, max_time_for_bandwidth, &msg_length_for_lat, &msg_length_for_bw, &latency_min, &latency_avg, &latency_max, &number_of_pairs_for_lat, &bandwidth_min, &bandwidth_avg, &bandwidth_max, &number_of_pairs_for_bw, &ring_lat, &rand_lat, &ring_bw, &rand_bw ); if (myrank == 0 ) { fprintf ( OutFile, "\n------------------------------------------------------------------\n"); fprintf ( OutFile, "Latency-Bandwidth-Benchmark R1.5.1 (c) HLRS, University of Stuttgart\n"); fprintf ( OutFile, "Written by Rolf Rabenseifner, Gerrit Schulz, and Michael Speck, Germany\n\n"); fprintf( OutFile, "Major Benchmark results:\n" ); fprintf( OutFile, "------------------------\n\n" ); fprintf( OutFile, "Max Ping Pong Latency: %13.6f msecs\n", latency_max*1e3 ); fprintf( OutFile, "Randomly Ordered Ring Latency: %13.6f msecs\n", rand_lat*1e3 ); fprintf( OutFile, "Min Ping Pong Bandwidth: %13.6f MB/s\n", bandwidth_min/1e6 ); fprintf( OutFile, "Naturally Ordered Ring Bandwidth: %13.6f MB/s\n", ring_bw/1e6 ); fprintf( OutFile, "Randomly Ordered Ring Bandwidth: %13.6f MB/s\n", rand_bw/1e6 ); *MaxPingPongLatency = latency_max * 1e6; /* usec */ *RandomlyOrderedRingLatency = rand_lat * 1e6; /* usec */ *MinPingPongBandwidth = bandwidth_min * 1e-9; /* GB/s */ *NaturallyOrderedRingBandwidth = ring_bw * 1e-9; /* GB/s */ *RandomlyOrderedRingBandwidth = rand_bw * 1e-9; /* GB/s */ *MinPingPongLatency = latency_min * 1e6; /* usec */ *AvgPingPongLatency = latency_avg * 1e6; /* usec */ *MaxPingPongBandwidth = bandwidth_max * 1e-9; /* GB/s */ *AvgPingPongBandwidth = bandwidth_avg * 1e-9; /* GB/s */ *NaturallyOrderedRingLatency = ring_lat * 1e6; /* usec */ fprintf ( OutFile, "\n------------------------------------------------------------------\n"); fprintf( OutFile, "\nDetailed benchmark results:\n" ); fprintf( OutFile, "Ping Pong:\n" ); fprintf ( OutFile, "Latency min / avg / max: %10.6f / %10.6f / %10.6f msecs\n", latency_min*1e3, latency_avg*1e3, latency_max*1e3); fprintf ( OutFile, "Bandwidth min / avg / max: %10.3f / %10.3f / %10.3f MByte/s\n", bandwidth_min/1e6, bandwidth_avg/1e6, bandwidth_max/1e6); fprintf( OutFile, "Ring:\n" ); fprintf( OutFile, "On naturally ordered ring: latency= %13.6f msec, bandwidth= %13.6f MB/s\n", ring_lat*1e3, ring_bw/1e6); fprintf( OutFile, "On randomly ordered ring: latency= %13.6f msec, bandwidth= %13.6f MB/s\n", rand_lat*1e3, rand_bw/1e6); fprintf ( OutFile, "\n------------------------------------------------------------------\n"); fprintf( OutFile, "\nBenchmark conditions:\n" ); fprintf( OutFile, " The latency measurements were done with %8d bytes\n", msg_length_for_lat); fprintf( OutFile, " The bandwidth measurements were done with %8d bytes\n", msg_length_for_bw); fprintf( OutFile, " The ring communication was done in both directions on %1d processes\n", size); fprintf( OutFile, " The Ping Pong measurements were done on \n"); fprintf( OutFile, " - %10.0f pairs of processes for latency benchmarking, and \n", 1.0*number_of_pairs_for_lat); fprintf( OutFile, " - %10.0f pairs of processes for bandwidth benchmarking, \n", 1.0*number_of_pairs_for_bw); fprintf( OutFile, " out of %d*(%d-1) = %10.0f possible combinations on %1d processes.\n", size, size, 1.0*size*(size-1), size); fprintf( OutFile, " (1 MB/s = 10**6 byte/sec)\n" ); fprintf( OutFile, "\n------------------------------------------------------------------\n"); fflush( OutFile ); } } void main_bench_lat_bw(HPCC_Params *params) { int myRank, commSize; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); if (0 == myRank) { OutFile = fopen( params->outFname, "a" ); if (! OutFile) { OutFile = stderr; fprintf( OutFile, "Cannot open output file.\n" ); } } else OutFile = stderr; if (commSize > 1) bench_lat_bw_print( ¶ms->MaxPingPongLatency, ¶ms->RandomlyOrderedRingLatency, ¶ms->MinPingPongBandwidth, ¶ms->NaturallyOrderedRingBandwidth, ¶ms->RandomlyOrderedRingBandwidth, ¶ms->MinPingPongLatency, ¶ms->AvgPingPongLatency, ¶ms->MaxPingPongBandwidth, ¶ms->AvgPingPongBandwidth, ¶ms->NaturallyOrderedRingLatency ); MPI_Bcast( ¶ms->MaxPingPongLatency, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->RandomlyOrderedRingLatency, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->MinPingPongBandwidth, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->NaturallyOrderedRingBandwidth, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->RandomlyOrderedRingBandwidth, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->MinPingPongLatency, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->AvgPingPongLatency, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->MaxPingPongBandwidth, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->AvgPingPongBandwidth, 1, MPI_DOUBLE, 0, comm ); MPI_Bcast( ¶ms->NaturallyOrderedRingLatency, 1, MPI_DOUBLE, 0, comm ); fflush( OutFile ); if (stderr != OutFile) fclose(OutFile); } hpcc-1.4.1/src/extfinalize.c0000644000000000000000000000077611353467335012622 00000000000000/* This routine is called right before MPI_Finalize() and allows finalization of external software and hardware components. It can be replaced at the time of installation. A sample implemenation may finialize proprietary computational and communication libraries. The parameter "extdata" points to an object of size of a pointer. "extdata" comes from HPCC_external_init(). Upon success, the function should return 0. */ int HPCC_external_finalize(int argc, char *argv[], void *extdata) { return 0; } hpcc-1.4.1/src/extinit.c0000644000000000000000000000113211353467335011747 00000000000000/* This routine is called right after MPI_Init() and allows initialization of external software and hardware components. It can be replaced at the time of installation. A sample implemenation may initialize proprietary computational and communication libraries. The parameter "extdata" points to an object of size of a pointer. The function may choose to store a pointer to its internal data and it will be passed to the finalization routine HPCC_external_finalize(). Upon success, the function should return 0. */ int HPCC_external_init(int argc, char *argv[], void *extdata) { return 0; } hpcc-1.4.1/src/hpcc.c0000644000000000000000000002603111353467335011205 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* hpcc.c */ #include #include int main(int argc, char *argv[]) { int myRank, commSize; char *outFname; FILE *outputFile; HPCC_Params params; time_t currentTime; void *extdata; MPI_Init( &argc, &argv ); if (HPCC_external_init( argc, argv, &extdata )) goto hpcc_end; if (HPCC_Init( ¶ms )) goto hpcc_end; MPI_Comm_size( MPI_COMM_WORLD, &commSize ); MPI_Comm_rank( MPI_COMM_WORLD, &myRank ); outFname = params.outFname; /* -------------------------------------------------- */ /* MPI RandomAccess */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of MPIRandomAccess section.\n" ); END_IO( myRank, outputFile ); if (params.RunMPIRandomAccess) HPCC_MPIRandomAccess( ¶ms ); time( ¤tTime ); BEGIN_IO(myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of MPIRandomAccess section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* StarRandomAccess */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of StarRandomAccess section.\n" ); END_IO( myRank, outputFile ); if (params.RunStarRandomAccess) HPCC_StarRandomAccess( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of StarRandomAccess section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* SingleRandomAccess */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of SingleRandomAccess section.\n" ); END_IO( myRank, outputFile ); if (params.RunSingleRandomAccess) HPCC_SingleRandomAccess( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of SingleRandomAccess section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* MPI RandomAccess LCG */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of MPIRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); if (params.RunMPIRandomAccess_LCG) HPCC_MPIRandomAccess_LCG( ¶ms ); time( ¤tTime ); BEGIN_IO(myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of MPIRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* StarRandomAccess LCG */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of StarRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); if (params.RunStarRandomAccess_LCG) HPCC_StarRandomAccess_LCG( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of StarRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* SingleRandomAccess LCG */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of SingleRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); if (params.RunSingleRandomAccess_LCG) HPCC_SingleRandomAccess_LCG( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of SingleRandomAccess_LCG section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* PTRANS */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of PTRANS section.\n" ); END_IO( myRank, outputFile ); if (params.RunPTRANS) PTRANS( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of PTRANS section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* StarDGEMM */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of StarDGEMM section.\n" ); END_IO( myRank, outputFile ); if (params.RunStarDGEMM) HPCC_StarDGEMM( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of StarDGEMM section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* SingleDGEMM */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of SingleDGEMM section.\n" ); END_IO( myRank, outputFile ); if (params.RunSingleDGEMM) HPCC_SingleDGEMM( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of SingleDGEMM section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* StarSTREAM */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of StarSTREAM section.\n" ); END_IO( myRank, outputFile ); if (params.RunStarStream) HPCC_StarStream( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of StarSTREAM section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* SingleSTREAM */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of SingleSTREAM section.\n" ); END_IO( myRank, outputFile ); if (params.RunSingleStream) HPCC_SingleStream( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of SingleSTREAM section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* MPIFFT */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of MPIFFT section.\n" ); END_IO( myRank, outputFile ); if (params.RunMPIFFT) HPCC_MPIFFT( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of MPIFFT section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* StarFFT */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of StarFFT section.\n" ); END_IO( myRank, outputFile ); if (params.RunStarFFT) HPCC_StarFFT( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of StarFFT section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* SingleFFT */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of SingleFFT section.\n" ); END_IO( myRank, outputFile ); if (params.RunSingleFFT) HPCC_SingleFFT( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of SingleFFT section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* Latency/Bandwidth */ /* -------------------------------------------------- */ MPI_Barrier( MPI_COMM_WORLD ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of LatencyBandwidth section.\n" ); END_IO( myRank, outputFile ); if (params.RunLatencyBandwidth) main_bench_lat_bw( ¶ms ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of LatencyBandwidth section.\n" ); END_IO( myRank, outputFile ); /* -------------------------------------------------- */ /* HPL */ /* -------------------------------------------------- */ BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile, "Begin of HPL section.\n" ); END_IO( myRank, outputFile ); if (params.RunHPL) HPL_main( argc, argv, ¶ms.HPLrdata, ¶ms.Failure ); time( ¤tTime ); BEGIN_IO( myRank, outFname, outputFile); fprintf( outputFile,"Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "End of HPL section.\n" ); END_IO( myRank, outputFile ); hpcc_end: HPCC_Finalize( ¶ms ); HPCC_external_finalize( argc, argv, extdata ); MPI_Finalize(); return 0; } hpcc-1.4.1/src/io.c0000644000000000000000000010003211353467335010671 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ /* hpcc.c */ #include #include #include #include #ifdef _OPENMP #include #endif static double HPCC_MemProc = -1.0, HPCC_MemVal = -1.0; static int HPCC_MemSpec = -1; static int ReadInts(char *buf, int n, int *val) { int i, j; for (j = i = 0; i < n; i++) { if (sscanf( buf + j, "%d", val + i ) != 1) { i--; break; } for (; buf[j] && isdigit(buf[j]); j++) ; /* EMPTY */ for (; buf[j] && ! isdigit(buf[j]); j++) ; /* EMPTY */ if (! buf[j]) { i--; break; } } return i + 1; } static int HPCC_InitHPL(HPCC_Params *p) { HPL_pdinfo( &p->test, &p->ns, p->nval, &p->nbs, p->nbval, &p->porder, &p->npqs, p->pval, p->qval, &p->npfs, p->pfaval, &p->nbms, p->nbmval, &p->ndvs, p->ndvval, &p->nrfs, p->rfaval, &p->ntps, p->topval, &p->ndhs, p->ndhval, &p->fswap, &p->tswap, &p->L1notran, &p->Unotran, &p->equil, &p->align ); if (p->test.thrsh <= 0.0) p->Failure = 1; return 0; } static int iiamax(int n, int *x, int incx) { int i, v, mx, idx = 0; idx = 0; mx = (x[0] < 0 ? -x[0] : x[0]); for (i = 0; i < n; i += incx) { v = (x[i] < 0 ? -x[i] : x[i]); if (mx < v) {mx = v; idx = i;} } return idx; } static void icopy(int n, int *src, int sinc, int *dst, int dinc) { int i; for (i = n; i; i--) { *dst = *src; dst += dinc; src += sinc; } } int HPCC_InputFileInit(HPCC_Params *params) { int myRank, commSize; int i, j, n, ioErr, lastConfigLine = 32, line, rv, maxHPLn; char buf[82]; int nbuf = 82; FILE *f, *outputFile; MPI_Comm comm = MPI_COMM_WORLD; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); if (0 == myRank) { f = fopen( params->inFname, "r" ); if (! f) { ioErr = 1; goto ioEnd; } /* skip irrelevant lines in config file */ for (line = 0; line < lastConfigLine; line++) if (! fgets( buf, nbuf, f )) break; if (line < lastConfigLine) { /* if didn't read all the required lines */ ioErr = 1; goto ioEnd; } /* Get values of N for PTRANS */ line++; fgets( buf, nbuf, f ); rv = sscanf( buf, "%d", &n ); if (rv != 1 || n < 0) { /* parse error or negative value*/ n = 0; BEGIN_IO(myRank, params->outFname, outputFile); fprintf( outputFile, "Error in line %d of the input file.\n", line ); END_IO( myRank, outputFile ); } n = Mmin( n, HPL_MAX_PARAM ); line++; fgets( buf, nbuf, f ); ReadInts( buf, n, params->PTRANSnval ); /* find the largest matrix for HPL */ maxHPLn = params->nval[iiamax( params->ns, params->nval, 1 )]; for (j = i = 0; i < n; i++) { /* if memory for PTRANS is at least 90% of what's used for HPL */ if (params->PTRANSnval[i] >= 0.9486 * maxHPLn * 0.5) { params->PTRANSnval[j] = params->PTRANSnval[i]; j++; } } n = j; /* only this many entries use enough memory */ /* copy matrix sizes from HPL, divide by 2 so both PTRANS matrices (plus "work" arrays) occupy as much as HPL's one */ for (i = 0; i < params->ns; i++) params->PTRANSnval[i + n] = params->nval[i] / 2; params->PTRANSns = n + params->ns; /* Get values of block sizes */ line++; fgets( buf, nbuf, f ); rv = sscanf( buf, "%d", &n ); if (rv != 1 || n < 0) { /* parse error or negative value*/ n = 0; BEGIN_IO( myRank, params->outFname, outputFile ); fprintf( outputFile, "Error in line %d of the input file.\n", line ); END_IO( myRank, outputFile ); } n = Mmin( n, HPL_MAX_PARAM ); line++; fgets( buf, nbuf, f ); ReadInts( buf, n, params->PTRANSnbval ); icopy( params->nbs, params->nbval, 1, params->PTRANSnbval + n, 1 ); params->PTRANSnbs = n + params->nbs; ioErr = 0; ioEnd: if (f) fclose( f ); } MPI_Bcast( &ioErr, 1, MPI_INT, 0, comm ); if (ioErr) { /* copy matrix sizes from HPL, divide by 2 so both PTRANS matrices (plus "work" arrays) occupy as much as HPL's one */ for (i = 0; i < params->ns; i++) params->PTRANSnval[i] = params->nval[i] / 2; params->PTRANSns = params->ns; icopy( params->nbs, params->nbval, 1, params->PTRANSnbval, 1 ); params->PTRANSnbs = params->nbs; } /* broadcast what's been read on node 0 */ MPI_Bcast( ¶ms->PTRANSns, 1, MPI_INT, 0, comm ); if (params->PTRANSns > 0) MPI_Bcast( ¶ms->PTRANSnval, params->PTRANSns, MPI_INT, 0, comm ); MPI_Bcast( ¶ms->PTRANSnbs, 1, MPI_INT, 0, comm ); if (params->PTRANSnbs > 0) MPI_Bcast( ¶ms->PTRANSnbval, params->PTRANSnbs, MPI_INT, 0, comm ); /* copy what HPL has */ params->PTRANSnpqs = params->npqs; icopy( params->npqs, params->qval, 1, params->PTRANSqval, 1 ); icopy( params->npqs, params->pval, 1, params->PTRANSpval, 1 ); return ioErr; } static int ErrorReduce(FILE *f, char *str, int eCode, MPI_Comm comm) { int rCode; if (eCode) eCode = 1; /* make sure error is indicated with 1 */ MPI_Allreduce( &eCode, &rCode, 1, MPI_INT, MPI_SUM, comm ); if (rCode) { if (f) fprintf( f, str ); return -1; } return 0; } int HPCC_Init(HPCC_Params *params) { int myRank, commSize; int i, nMax, nbMax, procCur, procMax, procMin, errCode; double totalMem; char inFname[12] = "hpccinf.txt", outFname[13] = "hpccoutf.txt"; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; time_t currentTime; char hostname[MPI_MAX_PROCESSOR_NAME + 1]; int hostnameLen; size_t hpl_mem, ptrans_mem; long dMemSize; outputFile = NULL; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); strcpy( params->inFname, inFname ); strcpy( params->outFname, outFname ); if (0 == myRank) outputFile = fopen( params->outFname, "a" ); errCode = 0; if (sizeof(u64Int) < 8 || sizeof(s64Int) < 8) errCode = 1; if (ErrorReduce( outputFile, "No 64-bit integer type available.", errCode, comm )) return -1; i = MPI_Get_processor_name( hostname, &hostnameLen ); if (i) hostname[0] = 0; else hostname[Mmax(hostnameLen, MPI_MAX_PROCESSOR_NAME)] = 0; time( ¤tTime ); BEGIN_IO( myRank, params->outFname, outputFile ); fprintf( outputFile, "########################################################################\n" ); fprintf( outputFile, "This is the DARPA/DOE HPC Challenge Benchmark version %d.%d.%d October 2003\n", HPCC_VERSION_MAJOR, HPCC_VERSION_MINOR, HPCC_VERSION_MICRO ); fprintf( outputFile, "Produced by Jack Dongarra and Piotr Luszczek\n" ); fprintf( outputFile, "Innovative Computing Laboratory\n" ); fprintf( outputFile, "University of Tennessee Knoxville and Oak Ridge National Laboratory\n\n" ); fprintf( outputFile, "See the source files for authors of specific codes.\n" ); fprintf( outputFile, "Compiled on %s at %s\n", __DATE__ , __TIME__ ); fprintf( outputFile, "Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "Hostname: '%s'\n", hostname ); fprintf( outputFile, "########################################################################\n" ); END_IO( myRank, outputFile ); params->Failure = 0; HPCC_InitHPL( params ); /* HPL calls exit() if there is a problem */ HPCC_InputFileInit( params ); params->RunHPL = 0; params->RunStarDGEMM = 0; params->RunSingleDGEMM = 0; params->RunPTRANS = 0; params->RunStarStream = 0; params->RunSingleStream = 0; params->RunMPIRandomAccess_LCG = 0; params->RunStarRandomAccess_LCG = 0; params->RunSingleRandomAccess_LCG = 0; params->RunMPIRandomAccess = 0; params->RunStarRandomAccess = 0; params->RunSingleRandomAccess = 0; params->RunLatencyBandwidth = 0; params->RunMPIFFT = 0; params->RunHPL = params->RunStarDGEMM = params->RunSingleDGEMM = params->RunPTRANS = params->RunStarStream = params->RunSingleStream = params->RunMPIRandomAccess_LCG = params->RunStarRandomAccess_LCG = params->RunSingleRandomAccess_LCG = params->RunMPIRandomAccess = params->RunStarRandomAccess = params->RunSingleRandomAccess = params->RunMPIFFT = params->RunStarFFT = params->RunSingleFFT = params->RunLatencyBandwidth = 1; params->MPIRandomAccess_LCG_GUPs = params->MPIRandomAccess_GUPs = params->StarGUPs = params->SingleGUPs = params->StarDGEMMGflops = params->SingleDGEMMGflops = -1.0; params->StarStreamCopyGBs = params->StarStreamScaleGBs = params->StarStreamAddGBs = params->StarStreamTriadGBs = params->SingleStreamCopyGBs = params->SingleStreamScaleGBs = params->SingleStreamAddGBs = params->SingleStreamTriadGBs = params->SingleFFTGflops = params->StarFFTGflops = params->MPIFFTGflops = params->MPIFFT_maxErr = params->MaxPingPongLatency = params-> RandomlyOrderedRingLatency = params-> MinPingPongBandwidth = params->NaturallyOrderedRingBandwidth = params->RandomlyOrderedRingBandwidth = params->MinPingPongLatency = params->AvgPingPongLatency = params->MaxPingPongBandwidth = params->AvgPingPongBandwidth = params->NaturallyOrderedRingLatency = -1.0; params->HPLrdata.Gflops = -1000.0; params->HPLrdata.time = params->HPLrdata.eps = params->HPLrdata.RnormI = params->HPLrdata.Anorm1 = params->HPLrdata.AnormI = params->HPLrdata.Xnorm1 = params->HPLrdata.XnormI = -1.0; params->HPLrdata.N = params->HPLrdata.NB = params->HPLrdata.nprow = params->HPLrdata.npcol = params->HPLrdata.depth = params->HPLrdata.nbdiv = params->HPLrdata.nbmin = -1; params->HPLrdata.cpfact = params->HPLrdata.crfact = params->HPLrdata.ctop = params->HPLrdata.order = '-'; params->PTRANSrdata.GBs = params->PTRANSrdata.time = params->PTRANSrdata.residual = -1.0; params->PTRANSrdata.n = params->PTRANSrdata.nb = params->PTRANSrdata.nprow = params->PTRANSrdata.npcol = -1; params->MPIRandomAccess_LCG_ErrorsFraction = params->MPIRandomAccess_ErrorsFraction = params->MPIRandomAccess_LCG_time = params->MPIRandomAccess_LCG_CheckTime = params->MPIRandomAccess_time = params->MPIRandomAccess_CheckTime = params->MPIRandomAccess_LCG_TimeBound = params->MPIRandomAccess_TimeBound = -1.0; params->DGEMM_N = params->FFT_N = params->StreamVectorSize = params->MPIRandomAccess_LCG_Algorithm = params->MPIRandomAccess_Algorithm = params->MPIFFT_Procs = -1; params->StreamThreads = 1; params->FFTEnblk = params->FFTEnp = params->FFTEl2size = -1; params->MPIFFT_N = params->RandomAccess_LCG_N = params->MPIRandomAccess_LCG_N = params->MPIRandomAccess_LCG_Errors = params->RandomAccess_N = params->MPIRandomAccess_N = params->MPIRandomAccess_Errors = params->MPIRandomAccess_LCG_ExeUpdates = params->MPIRandomAccess_ExeUpdates = (s64Int)(-1); procMax = procMin = params->pval[0] * params->qval[0]; for (i = 1; i < params->npqs; ++i) { procCur = params->pval[i] * params->qval[i]; if (procMax < procCur) procMax = procCur; if (procMin > procCur) procMin = procCur; } params->HPLMaxProc = procMax; params->HPLMinProc = procMin; nMax = params->nval[iiamax( params->ns, params->nval, 1 )]; /* totalMem = (nMax*nMax) * sizeof(double) */ totalMem = nMax; totalMem *= nMax; totalMem *= sizeof(double); params->HPLMaxProcMem = totalMem / procMin; for (i = 0; i < MPIFFT_TIMING_COUNT; i++) params->MPIFFTtimingsForward[i] = 0.0; i = iiamax( params->PTRANSnbs, params->PTRANSnbval, 1 ); nbMax = params->PTRANSnbval[i]; #ifdef HPCC_MEMALLCTR MaxMem( commSize, 0, 0, params->PTRANSns, params->PTRANSnval, params->PTRANSnval, params->PTRANSnbs, params->PTRANSnbval, params->PTRANSnbval, params->PTRANSnpqs, params->PTRANSpval, params->PTRANSqval, &dMemSize ); ptrans_mem = dMemSize * sizeof(double) + 3 * commSize * sizeof(int); hpl_mem = params->HPLMaxProcMem + (nMax + nbMax) * sizeof(double) * nbMax; HPCC_alloc_init( Mmax( ptrans_mem, hpl_mem ) ); #endif return 0; } int HPCC_Finalize(HPCC_Params *params) { int myRank, commSize; int i; FILE *outputFile; MPI_Comm comm = MPI_COMM_WORLD; time_t currentTime; #ifdef HPCC_MEMALLCTR HPCC_alloc_finalize(); #endif time( ¤tTime ); MPI_Comm_rank( comm, &myRank ); MPI_Comm_size( comm, &commSize ); BEGIN_IO(myRank, params->outFname, outputFile); fprintf( outputFile, "Begin of Summary section.\n" ); fprintf( outputFile, "VersionMajor=%d\n", HPCC_VERSION_MAJOR ); fprintf( outputFile, "VersionMinor=%d\n", HPCC_VERSION_MINOR ); fprintf( outputFile, "VersionMicro=%d\n", HPCC_VERSION_MICRO ); fprintf( outputFile, "VersionRelease=%c\n", HPCC_VERSION_RELEASE ); fprintf( outputFile, "LANG=%s\n", "C" ); fprintf( outputFile, "Success=%d\n", params->Failure ? 0 : 1 ); fprintf( outputFile, "sizeof_char=%d\n", (int)sizeof(char) ); fprintf( outputFile, "sizeof_short=%d\n", (int)sizeof(short) ); fprintf( outputFile, "sizeof_int=%d\n", (int)sizeof(int) ); fprintf( outputFile, "sizeof_long=%d\n", (int)sizeof(long) ); fprintf( outputFile, "sizeof_void_ptr=%d\n", (int)sizeof(void*) ); fprintf( outputFile, "sizeof_size_t=%d\n", (int)sizeof(size_t) ); fprintf( outputFile, "sizeof_float=%d\n", (int)sizeof(float) ); fprintf( outputFile, "sizeof_double=%d\n", (int)sizeof(double) ); fprintf( outputFile, "sizeof_s64Int=%d\n", (int)sizeof(s64Int) ); fprintf( outputFile, "sizeof_u64Int=%d\n", (int)sizeof(u64Int) ); fprintf( outputFile, "sizeof_struct_double_double=%d\n", (int)sizeof(struct{double HPCC_r,HPCC_i;}) ); fprintf( outputFile, "CommWorldProcs=%d\n", commSize ); fprintf( outputFile, "MPI_Wtick=%e\n", MPI_Wtick() ); fprintf( outputFile, "HPL_Tflops=%g\n", params->HPLrdata.Gflops * 1e-3 ); fprintf( outputFile, "HPL_time=%g\n", params->HPLrdata.time ); fprintf( outputFile, "HPL_eps=%g\n", params->HPLrdata.eps ); fprintf( outputFile, "HPL_RnormI=%g\n", params->HPLrdata.RnormI ); fprintf( outputFile, "HPL_Anorm1=%g\n", params->HPLrdata.Anorm1 ); fprintf( outputFile, "HPL_AnormI=%g\n", params->HPLrdata.AnormI ); fprintf( outputFile, "HPL_Xnorm1=%g\n", params->HPLrdata.Xnorm1 ); fprintf( outputFile, "HPL_XnormI=%g\n", params->HPLrdata.XnormI ); fprintf( outputFile, "HPL_BnormI=%g\n", params->HPLrdata.BnormI ); fprintf( outputFile, "HPL_N=%d\n", params->HPLrdata.N ); fprintf( outputFile, "HPL_NB=%d\n", params->HPLrdata.NB ); fprintf( outputFile, "HPL_nprow=%d\n", params->HPLrdata.nprow ); fprintf( outputFile, "HPL_npcol=%d\n", params->HPLrdata.npcol ); fprintf( outputFile, "HPL_depth=%d\n", params->HPLrdata.depth ); fprintf( outputFile, "HPL_nbdiv=%d\n", params->HPLrdata.nbdiv ); fprintf( outputFile, "HPL_nbmin=%d\n", params->HPLrdata.nbmin ); fprintf( outputFile, "HPL_cpfact=%c\n", params->HPLrdata.cpfact ); fprintf( outputFile, "HPL_crfact=%c\n", params->HPLrdata.crfact ); fprintf( outputFile, "HPL_ctop=%c\n", params->HPLrdata.ctop ); fprintf( outputFile, "HPL_order=%c\n", params->HPLrdata.order ); fprintf( outputFile, "HPL_dMACH_EPS=%e\n", HPL_dlamch( HPL_MACH_EPS ) ); fprintf( outputFile, "HPL_dMACH_SFMIN=%e\n",HPL_dlamch( HPL_MACH_SFMIN ) ); fprintf( outputFile, "HPL_dMACH_BASE=%e\n", HPL_dlamch( HPL_MACH_BASE ) ); fprintf( outputFile, "HPL_dMACH_PREC=%e\n", HPL_dlamch( HPL_MACH_PREC ) ); fprintf( outputFile, "HPL_dMACH_MLEN=%e\n", HPL_dlamch( HPL_MACH_MLEN ) ); fprintf( outputFile, "HPL_dMACH_RND=%e\n", HPL_dlamch( HPL_MACH_RND ) ); fprintf( outputFile, "HPL_dMACH_EMIN=%e\n", HPL_dlamch( HPL_MACH_EMIN ) ); fprintf( outputFile, "HPL_dMACH_RMIN=%e\n", HPL_dlamch( HPL_MACH_RMIN ) ); fprintf( outputFile, "HPL_dMACH_EMAX=%e\n", HPL_dlamch( HPL_MACH_EMAX ) ); fprintf( outputFile, "HPL_dMACH_RMAX=%e\n", HPL_dlamch( HPL_MACH_RMAX ) ); fprintf( outputFile, "HPL_sMACH_EPS=%e\n", (double)HPL_slamch( HPL_MACH_EPS ) ); fprintf( outputFile, "HPL_sMACH_SFMIN=%e\n",(double)HPL_slamch( HPL_MACH_SFMIN ) ); fprintf( outputFile, "HPL_sMACH_BASE=%e\n", (double)HPL_slamch( HPL_MACH_BASE ) ); fprintf( outputFile, "HPL_sMACH_PREC=%e\n", (double)HPL_slamch( HPL_MACH_PREC ) ); fprintf( outputFile, "HPL_sMACH_MLEN=%e\n", (double)HPL_slamch( HPL_MACH_MLEN ) ); fprintf( outputFile, "HPL_sMACH_RND=%e\n", (double)HPL_slamch( HPL_MACH_RND ) ); fprintf( outputFile, "HPL_sMACH_EMIN=%e\n", (double)HPL_slamch( HPL_MACH_EMIN ) ); fprintf( outputFile, "HPL_sMACH_RMIN=%e\n", (double)HPL_slamch( HPL_MACH_RMIN ) ); fprintf( outputFile, "HPL_sMACH_EMAX=%e\n", (double)HPL_slamch( HPL_MACH_EMAX ) ); fprintf( outputFile, "HPL_sMACH_RMAX=%e\n", (double)HPL_slamch( HPL_MACH_RMAX ) ); fprintf( outputFile, "dweps=%e\n", HPCC_dweps() ); fprintf( outputFile, "sweps=%e\n", (double)HPCC_sweps() ); fprintf( outputFile, "HPLMaxProcs=%d\n", params->HPLMaxProc ); fprintf( outputFile, "HPLMinProcs=%d\n", params->HPLMinProc ); fprintf( outputFile, "DGEMM_N=%d\n", params->DGEMM_N ); fprintf( outputFile, "StarDGEMM_Gflops=%g\n", params->StarDGEMMGflops ); fprintf( outputFile, "SingleDGEMM_Gflops=%g\n", params->SingleDGEMMGflops ); fprintf( outputFile, "PTRANS_GBs=%g\n", params->PTRANSrdata.GBs ); fprintf( outputFile, "PTRANS_time=%g\n", params->PTRANSrdata.time ); fprintf( outputFile, "PTRANS_residual=%g\n", params->PTRANSrdata.residual ); fprintf( outputFile, "PTRANS_n=%d\n", params->PTRANSrdata.n ); fprintf( outputFile, "PTRANS_nb=%d\n", params->PTRANSrdata.nb ); fprintf( outputFile, "PTRANS_nprow=%d\n", params->PTRANSrdata.nprow ); fprintf( outputFile, "PTRANS_npcol=%d\n", params->PTRANSrdata.npcol ); fprintf( outputFile, "MPIRandomAccess_LCG_N=" FSTR64 "\n", params->MPIRandomAccess_LCG_N ); fprintf( outputFile, "MPIRandomAccess_LCG_time=%g\n", params->MPIRandomAccess_LCG_time ); fprintf( outputFile, "MPIRandomAccess_LCG_CheckTime=%g\n", params->MPIRandomAccess_LCG_CheckTime ); fprintf( outputFile, "MPIRandomAccess_LCG_Errors=" FSTR64 "\n", params->MPIRandomAccess_LCG_Errors ); fprintf( outputFile, "MPIRandomAccess_LCG_ErrorsFraction=%g\n", params->MPIRandomAccess_LCG_ErrorsFraction ); fprintf( outputFile, "MPIRandomAccess_LCG_ExeUpdates=" FSTR64 "\n", params->MPIRandomAccess_LCG_ExeUpdates ); fprintf( outputFile, "MPIRandomAccess_LCG_GUPs=%g\n", params->MPIRandomAccess_LCG_GUPs ); fprintf( outputFile, "MPIRandomAccess_LCG_TimeBound=%g\n", params->MPIRandomAccess_LCG_TimeBound ); fprintf( outputFile, "MPIRandomAccess_LCG_Algorithm=%d\n", params->MPIRandomAccess_LCG_Algorithm ); fprintf( outputFile, "MPIRandomAccess_N=" FSTR64 "\n", params->MPIRandomAccess_N ); fprintf( outputFile, "MPIRandomAccess_time=%g\n", params->MPIRandomAccess_time ); fprintf( outputFile, "MPIRandomAccess_CheckTime=%g\n", params->MPIRandomAccess_CheckTime ); fprintf( outputFile, "MPIRandomAccess_Errors=" FSTR64 "\n", params->MPIRandomAccess_Errors ); fprintf( outputFile, "MPIRandomAccess_ErrorsFraction=%g\n", params->MPIRandomAccess_ErrorsFraction ); fprintf( outputFile, "MPIRandomAccess_ExeUpdates=" FSTR64 "\n", params->MPIRandomAccess_ExeUpdates ); fprintf( outputFile, "MPIRandomAccess_GUPs=%g\n", params->MPIRandomAccess_GUPs ); fprintf( outputFile, "MPIRandomAccess_TimeBound=%g\n", params->MPIRandomAccess_TimeBound ); fprintf( outputFile, "MPIRandomAccess_Algorithm=%d\n", params->MPIRandomAccess_Algorithm ); fprintf( outputFile, "RandomAccess_LCG_N=" FSTR64 "\n", params->RandomAccess_LCG_N ); fprintf( outputFile, "StarRandomAccess_LCG_GUPs=%g\n", params->Star_LCG_GUPs ); fprintf( outputFile, "SingleRandomAccess_LCG_GUPs=%g\n", params->Single_LCG_GUPs ); fprintf( outputFile, "RandomAccess_N=" FSTR64 "\n", params->RandomAccess_N ); fprintf( outputFile, "StarRandomAccess_GUPs=%g\n", params->StarGUPs ); fprintf( outputFile, "SingleRandomAccess_GUPs=%g\n", params->SingleGUPs ); fprintf( outputFile, "STREAM_VectorSize=%d\n", params->StreamVectorSize ); fprintf( outputFile, "STREAM_Threads=%d\n", params->StreamThreads ); fprintf( outputFile, "StarSTREAM_Copy=%g\n", params->StarStreamCopyGBs ); fprintf( outputFile, "StarSTREAM_Scale=%g\n", params->StarStreamScaleGBs ); fprintf( outputFile, "StarSTREAM_Add=%g\n", params->StarStreamAddGBs ); fprintf( outputFile, "StarSTREAM_Triad=%g\n", params->StarStreamTriadGBs ); fprintf( outputFile, "SingleSTREAM_Copy=%g\n", params->SingleStreamCopyGBs ); fprintf( outputFile, "SingleSTREAM_Scale=%g\n", params->SingleStreamScaleGBs ); fprintf( outputFile, "SingleSTREAM_Add=%g\n", params->SingleStreamAddGBs ); fprintf( outputFile, "SingleSTREAM_Triad=%g\n", params->SingleStreamTriadGBs ); fprintf( outputFile, "FFT_N=%d\n", params->FFT_N ); fprintf( outputFile, "StarFFT_Gflops=%g\n", params->StarFFTGflops ); fprintf( outputFile, "SingleFFT_Gflops=%g\n", params->SingleFFTGflops ); fprintf( outputFile, "MPIFFT_N=" FSTR64 "\n", params->MPIFFT_N ); fprintf( outputFile, "MPIFFT_Gflops=%g\n", params->MPIFFTGflops ); fprintf( outputFile, "MPIFFT_maxErr=%g\n", params->MPIFFT_maxErr ); fprintf( outputFile, "MPIFFT_Procs=%d\n", params->MPIFFT_Procs ); fprintf( outputFile, "MaxPingPongLatency_usec=%g\n", params->MaxPingPongLatency ); fprintf( outputFile, "RandomlyOrderedRingLatency_usec=%g\n", params->RandomlyOrderedRingLatency ); fprintf( outputFile, "MinPingPongBandwidth_GBytes=%g\n", params->MinPingPongBandwidth ); fprintf( outputFile, "NaturallyOrderedRingBandwidth_GBytes=%g\n", params->NaturallyOrderedRingBandwidth ); fprintf( outputFile, "RandomlyOrderedRingBandwidth_GBytes=%g\n", params->RandomlyOrderedRingBandwidth ); fprintf( outputFile, "MinPingPongLatency_usec=%g\n", params->MinPingPongLatency ); fprintf( outputFile, "AvgPingPongLatency_usec=%g\n", params->AvgPingPongLatency ); fprintf( outputFile, "MaxPingPongBandwidth_GBytes=%g\n", params->MaxPingPongBandwidth ); fprintf( outputFile, "AvgPingPongBandwidth_GBytes=%g\n", params->AvgPingPongBandwidth ); fprintf( outputFile, "NaturallyOrderedRingLatency_usec=%g\n", params->NaturallyOrderedRingLatency ); fprintf( outputFile, "FFTEnblk=%d\n", params->FFTEnblk ); fprintf( outputFile, "FFTEnp=%d\n", params->FFTEnp ); fprintf( outputFile, "FFTEl2size=%d\n", params->FFTEl2size ); #ifdef _OPENMP fprintf( outputFile, "M_OPENMP=%ld\n", (long)(_OPENMP) ); #pragma omp parallel { #pragma omp single nowait { fprintf( outputFile, "omp_get_num_threads=%d\n", omp_get_num_threads() ); fprintf( outputFile, "omp_get_max_threads=%d\n", omp_get_max_threads() ); fprintf( outputFile, "omp_get_num_procs=%d\n", omp_get_num_procs() ); } } #else fprintf( outputFile, "M_OPENMP=%ld\n", -1L ); fprintf( outputFile, "omp_get_num_threads=%d\n", 0 ); fprintf( outputFile, "omp_get_max_threads=%d\n", 0 ); fprintf( outputFile, "omp_get_num_procs=%d\n", 0 ); #endif fprintf( outputFile, "MemProc=%g\n", HPCC_MemProc ); fprintf( outputFile, "MemSpec=%d\n", HPCC_MemSpec ); fprintf( outputFile, "MemVal=%g\n", HPCC_MemVal ); for (i = 0; i < MPIFFT_TIMING_COUNT - 1; i++) fprintf( outputFile, "MPIFFT_time%d=%g\n", i, params->MPIFFTtimingsForward[i+1] - params->MPIFFTtimingsForward[i] ); /* CPS: C Preprocessor Symbols */ i = 0; #ifdef HPCC_FFT_235 i = 1; #endif fprintf( outputFile, "CPS_HPCC_FFT_235=%d\n", i ); i = 0; #ifdef HPCC_FFTW_ESTIMATE i = 1; #endif fprintf( outputFile, "CPS_HPCC_FFTW_ESTIMATE=%d\n", i ); i = 0; #ifdef HPCC_MEMALLCTR i = 1; #endif fprintf( outputFile, "CPS_HPCC_MEMALLCTR=%d\n", i ); i = 0; #ifdef HPL_USE_GETPROCESSTIMES i = 1; #endif fprintf( outputFile, "CPS_HPL_USE_GETPROCESSTIMES=%d\n", i ); i = 0; #ifdef RA_SANDIA_NOPT i = 1; #endif fprintf( outputFile, "CPS_RA_SANDIA_NOPT=%d\n", i ); i = 0; #ifdef RA_SANDIA_OPT2 i = 1; #endif fprintf( outputFile, "CPS_RA_SANDIA_OPT2=%d\n", i ); i = 0; #ifdef USING_FFTW i = 1; #endif fprintf( outputFile, "CPS_USING_FFTW=%d\n", i ); fprintf( outputFile, "End of Summary section.%s\n", "" ); fprintf( outputFile, "########################################################################\n" ); fprintf( outputFile, "End of HPC Challenge tests.\n" ); fprintf( outputFile, "Current time (%ld) is %s\n",(long)currentTime,ctime(¤tTime)); fprintf( outputFile, "########################################################################\n" ); END_IO( myRank, outputFile ); return 0; } int HPCC_LocalVectorSize(HPCC_Params *params, int vecCnt, size_t size, int pow2) { int flg2, maxIntBits2; /* this is the maximum power of 2 that that can be held in a signed integer (for a 4-byte integer, 2**31-1 is the maximum integer, so the maximum power of 2 is 30) */ maxIntBits2 = sizeof(int) * 8 - 2; /* flg2 = floor(log2(params->HPLMaxProcMem / size / vecCnt)) */ for (flg2 = 1; params->HPLMaxProcMem / size / vecCnt >> flg2; ++flg2) ; /* EMPTY */ --flg2; if (flg2 <= maxIntBits2) { if (pow2) return 1 << flg2; return params->HPLMaxProcMem / size / vecCnt; } return 1 << maxIntBits2; } int HPCC_ProcessGrid(int *P, int *Q, MPI_Comm comm) { int myRank, commSize; int i, p, q, nproc; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); for (nproc = commSize; ; --nproc) { /* this loop makes at most two iterations */ for (i = (int)sqrt( nproc ); i > 1; --i) { q = nproc / i; p = nproc / q; if (p * q == nproc) { *P = p; *Q = q; return 0; } } /* if the code gets here `nproc' is small or is a prime */ if (nproc < 20) { /* do 1D grid for small process counts */ *P = 1; *Q = nproc; return 0; } } return 0; } size_t HPCC_Memory(MPI_Comm comm) { int myRank, commSize; int num_threads; char memFile[13] = "hpccmemf.txt"; char buf[HPL_LINE_MAX]; int nbuf = HPL_LINE_MAX; char *sVal; FILE *f; double mult, mval, procMem; size_t rv; mult = 1.0; num_threads = 1; MPI_Comm_size( comm, &commSize ); MPI_Comm_rank( comm, &myRank ); #ifdef _OPENMP #pragma omp parallel { #pragma omp single nowait { num_threads = omp_get_num_threads(); } } #endif if (myRank == 0) { procMem = 64; f = fopen( memFile, "r" ); if (f) { if (fgets( buf, nbuf, f )) { if (strncmp( "Total=", buf, 6 ) == 0) { mult = 1.0 / commSize; sVal = buf + 6; HPCC_MemSpec = 1; } else if (strncmp( "Thread=", buf, 7 ) == 0) { mult = num_threads; sVal = buf + 7; HPCC_MemSpec = 2; } else if (strncmp( "Process=", buf, 8 ) == 0) { mult = 1.0; sVal = buf + 8; HPCC_MemSpec = 3; } else sVal = NULL; if (sVal && 1 == sscanf( sVal, "%lf", &mval )) { procMem = mval * mult; HPCC_MemVal = mval; } } fclose( f ); } } MPI_Bcast( &procMem, 1, MPI_DOUBLE, 0, comm ); rv = procMem; rv *= 1024; rv *= 1024; HPCC_MemProc = procMem; return rv; } int HPCC_Defaults(HPL_T_test *TEST, int *NS, int *N, int *NBS, int *NB, HPL_T_ORDER *PMAPPIN, int *NPQS, int *P, int *Q, int *NPFS, HPL_T_FACT *PF, int *NBMS, int *NBM, int *NDVS, int *NDV, int *NRFS, HPL_T_FACT *RF, int *NTPS, HPL_T_TOP *TP, int *NDHS, int *DH, HPL_T_SWAP *FSWAP, int *TSWAP, int *L1NOTRAN, int *UNOTRAN, int *EQUIL, int *ALIGN, MPI_Comm comm) { int nb = 80; double memFactor = 0.8; *NS = *NBS = *NPQS = *NPFS = *NBMS = *NDVS = *NRFS = *NTPS = *NDHS = 1; TEST->thrsh = 16.0; *NB = nb; *PMAPPIN = HPL_COLUMN_MAJOR; HPCC_ProcessGrid( P, Q, comm ); *N = (int)sqrt( memFactor * (double)(*P * *Q) * (double)(HPCC_Memory( comm ) / sizeof(double)) ) / (2 * nb); *N *= 2*nb; /* make N multiple of 2*nb so both HPL and PTRANS see matrix dimension divisible by nb */ *PF = HPL_RIGHT_LOOKING; *NBM = 4; *NDV = 2; *RF = HPL_CROUT; *TP = HPL_1RING_M; *DH = 1; *FSWAP = HPL_SW_MIX; *TSWAP = 64; *L1NOTRAN = 0; *UNOTRAN = 0; *EQUIL = 1; *ALIGN = 8; return 0; } #ifdef XERBLA_MISSING #ifdef Add_ #define F77xerbla xerbla_ #endif #ifdef Add__ #define F77xerbla xerbla__ #endif #ifdef NoChange #define F77xerbla xerbla #endif #ifdef UpCase #define F77xerbla XERBLA #endif #ifdef f77IsF2C #define F77xerbla xerbla_ #endif void F77xerbla(char *srname, F77_INTEGER *info, long srname_len) { /* int i; char Cname[7]; for (i = 0; i < 6; i++) Cname[i] = srname[i]; Cname[6] = 0; printf("xerbla(%d)\n", *info); */ printf("xerbla()\n"); fflush(stdout); } #endif #ifdef HPCC_MEMALLCTR #define MEM_MAXCNT 7 typedef double Mem_t; static Mem_t *Mem_base; static size_t Mem_dsize; /* Each entry can be in one of three states: 1. Full (holds a block of allocated memory) if: ptr != NULL; size > 0; free == 0 2. Free (holds block of unallocated memory) if: ptr != NULL; free = 1 3 Empty (doesn't hold a block of memory) if: ptr == NULL; free = 1 */ typedef struct { Mem_t *Mem_ptr; size_t Mem_size; int Mem_free; } Mem_entry_t; static Mem_entry_t Mem_blocks[MEM_MAXCNT]; static void HPCC_alloc_set_empty(int idx) { int i, n0, n; if (MEM_MAXCNT == idx) { n0 = 0; n = idx; } else { n0 = idx; n = idx + 1; } /* initialize all blocks to empty */ for (i = n0; i < n; ++i) { Mem_blocks[i].Mem_ptr = (Mem_t *)(NULL); Mem_blocks[i].Mem_size = 0; Mem_blocks[i].Mem_free = 1; } } static void HPCC_alloc_set_free(int idx, Mem_t *dptr, size_t size) { Mem_blocks[idx].Mem_ptr = dptr; Mem_blocks[idx].Mem_size = size; Mem_blocks[idx].Mem_free = 1; } int HPCC_alloc_init(size_t total_size) { size_t dsize; Mem_dsize = dsize = Mceil( total_size, sizeof(Mem_t) ); Mem_base = (Mem_t *)malloc( dsize * sizeof(Mem_t) ); HPCC_alloc_set_empty( MEM_MAXCNT ); if (Mem_base) { HPCC_alloc_set_free( 0, Mem_base, dsize ); return 0; } return -1; } int HPCC_alloc_finalize() { free( Mem_base ); HPCC_alloc_set_empty( MEM_MAXCNT ); return 0; } void * HPCC_malloc(size_t size) { size_t dsize, diff_size, cur_diff_size; int i, cur_best, cur_free; dsize = Mceil( size, sizeof(Mem_t) ); cur_diff_size = Mem_dsize + 1; cur_free = cur_best = MEM_MAXCNT; for (i = 0; i < MEM_MAXCNT; ++i) { /* skip full spots */ if (! Mem_blocks[i].Mem_free) continue; /* find empty spot */ if (! Mem_blocks[i].Mem_ptr) { cur_free = i; continue; } diff_size = Mem_blocks[i].Mem_size - dsize; if (Mem_blocks[i].Mem_size >= dsize && diff_size < cur_diff_size) { /* a match that's the best (so far) was found */ cur_diff_size = diff_size; cur_best = i; } } /* found a match */ if (cur_best < MEM_MAXCNT) { if (cur_free < MEM_MAXCNT && cur_diff_size > 0) { /* create a new free block */ HPCC_alloc_set_free( cur_free, Mem_blocks[cur_best].Mem_ptr + dsize, cur_diff_size ); Mem_blocks[cur_best].Mem_size = dsize; /* shrink the best match */ } Mem_blocks[cur_best].Mem_free = 0; return (void *)(Mem_blocks[cur_best].Mem_ptr); } return NULL; } void HPCC_free(void *ptr) { Mem_t *dptr = (Mem_t *)ptr; int cur_blk = MEM_MAXCNT, made_changes, i, j; /* look for the block being freed */ for (i = 0; i < MEM_MAXCNT; ++i) { if (Mem_blocks[i].Mem_free) continue; if (Mem_blocks[i].Mem_ptr == dptr) { cur_blk = i; break; } } /* not finding the pointer (including NULL) causes abort */ if (MEM_MAXCNT == cur_blk) { HPL_pabort( __LINE__, "HPCC_free", "Unknown pointer in HPCC_free()." ); } /* double-free causes abort */ if (1 == Mem_blocks[cur_blk].Mem_free) { HPL_pabort( __LINE__, "HPCC_free", "Second call to HPCC_free() with the same pointer." ); } Mem_blocks[cur_blk].Mem_free = 1; /* merge as many blocks as possible */ for (made_changes = 1; made_changes;) { made_changes = 0; for (i = 0; i < MEM_MAXCNT; ++i) { /* empty or full blocks can't be merged */ if (! Mem_blocks[i].Mem_free || ! Mem_blocks[i].Mem_ptr) continue; for (j = 0; j < MEM_MAXCNT; ++j) { /* empty or occupied blocks can't be merged */ if (! Mem_blocks[j].Mem_free || ! Mem_blocks[j].Mem_ptr) continue; if (Mem_blocks[i].Mem_ptr + Mem_blocks[i].Mem_size == Mem_blocks[j].Mem_ptr) { Mem_blocks[i].Mem_size += Mem_blocks[j].Mem_size; HPCC_alloc_set_empty( j ); made_changes = 1; } } } } } #endif hpcc-1.4.1/src/noopt.c0000644000000000000000000000064111256503660011420 00000000000000/* -*- mode: C; tab-width: 2; indent-tabs-mode: nil; fill-column: 79; coding: iso-latin-1-unix -*- */ double HPCC_dweps() { double eps, one, half; one = 1.0; half = one / 2.0; eps = one; while (one + eps != one) eps *= half; return eps; } float HPCC_sweps() { float eps, one, half; one = 1.0f; half = one / 2.0f; eps = one; while (one + eps != one) eps *= half; return eps; }