1、linpackdoclinpack/*Translated to C by Bonnie Toy 5/88 - modified on 2/25/94 to fix a problem with daxpy for unequal increments or equal increments not equal to 1. Jack Dongarra - modified on 08/27/09 fix typo line 270, plus set ix to 0 in the case incx is not 1 Julie LangouTo compile single precisio
2、n version for Sun-4: cc -DSP -O4 -fsingle -fsingle2 clinpack.c -lmTo compile double precision version for Sun-4: cc -DDP -O4 clinpack.c -lmTo obtain rolled source BLAS, add -DROLL to #define ONE 1.0e0#define PREC Double #endif#define NTIMES 10#ifdef ROLL#define ROLLING Rolled #endif#ifdef UNROLL#def
3、ine ROLLING Unrolled #endif#include #include static REAL time99;main () static REAL aa200200,a200201,b200,x200; REAL cray,ops,total,norma,normx; REAL resid,residn,eps,t1,tm,tm2; REAL epslon(),second(),kf; static int ipvt200,n,i,ntimes,info,lda,ldaa,kflops; lda = 201; ldaa = 200; cray = .056; n = 100
4、; fprintf(stdout,ROLLING);fprintf(stdout,PREC);fprintf(stdout,Precision Linpacknn); fprintf(stderr,ROLLING);fprintf(stderr,PREC);fprintf(stderr,Precision Linpacknn); ops = (2.0e0*(n*n*n)/3.0 + 2.0*(n*n); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time00 = second() - t1; t1 =
5、 second(); dgesl(a,lda,n,ipvt,b,0); time10 = second() - t1; total = time00 + time10;/* compute a residual to verify results. */ for (i = 0; i n; i+) xi = bi; matgen(a,lda,n,b,&norma); for (i = 0; i n; i+) bi = -bi; dmxpy(n,b,n,lda,x,a); resid = 0.0; normx = 0.0; for (i = 0; i fabs(double)bi) ? resid
6、 : fabs(double)bi); normx = (normx fabs(double)xi) ? normx : fabs(double)xi); eps = epslon(REAL)ONE); residn = resid/( n*norma*normx*eps ); printf( norm. resid resid machep); printf( x0-1 xn-1-1n); printf( %8.1f %16.8e%16.8e%16.8e%16.8en, (double)residn, (double)resid, (double)eps, (double)x0-1, (do
7、uble)xn-1-1); fprintf(stderr, times are reported for matrices of order %5dn,n); fprintf(stderr, dgefa dgesl total kflops unit); fprintf(stderr, ration); time20 = total; time30 = ops/(1.0e3*total); time40 = 2.0e3/time30; time50 = total/cray; fprintf(stderr, times for array with leading dimension of%5
8、dn,lda); print_time(0); matgen(a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time01 = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time11 = second() - t1; total = time01 + time11; time21 = total; time31 = ops/(1.0e3*total); time41 = 2.0e3/time31; time51 = total/cray; matgen(
9、a,lda,n,b,&norma); t1 = second(); dgefa(a,lda,n,ipvt,&info); time02 = second() - t1; t1 = second(); dgesl(a,lda,n,ipvt,b,0); time12 = second() - t1; total = time02 + time12; time22 = total; time32 = ops/(1.0e3*total); time42 = 2.0e3/time32; time52 = total/cray; ntimes = NTIMES; tm2 = 0.0; t1 = secon
10、d(); for (i = 0; i ntimes; i+) tm = second(); matgen(a,lda,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(a,lda,n,ipvt,&info); time03 = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i ntimes; i+) dgesl(a,lda,n,ipvt,b,0); time13 = (second() - t1)/ntimes; total = time03 + time13; time23 = to
11、tal; time33 = ops/(1.0e3*total); time43 = 2.0e3/time33; time53 = total/cray; print_time(1); print_time(2); print_time(3); matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time04 = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time14 = second() - t1; total = time04 +
12、 time14; time24 = total; time34 = ops/(1.0e3*total); time44 = 2.0e3/time34; time54 = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time05 = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time15 = second() - t1; total = time05 + time15; time25 = total;
13、time35 = ops/(1.0e3*total); time45 = 2.0e3/time35; time55 = total/cray; matgen(aa,ldaa,n,b,&norma); t1 = second(); dgefa(aa,ldaa,n,ipvt,&info); time06 = second() - t1; t1 = second(); dgesl(aa,ldaa,n,ipvt,b,0); time16 = second() - t1; total = time06 + time16; time26 = total; time36 = ops/(1.0e3*total
14、); time46 = 2.0e3/time36; time56 = total/cray; ntimes = NTIMES; tm2 = 0; t1 = second(); for (i = 0; i ntimes; i+) tm = second(); matgen(aa,ldaa,n,b,&norma); tm2 = tm2 + second() - tm; dgefa(aa,ldaa,n,ipvt,&info); time07 = (second() - t1 - tm2)/ntimes; t1 = second(); for (i = 0; i ntimes; i+) dgesl(a
15、a,ldaa,n,ipvt,b,0); time17 = (second() - t1)/ntimes; total = time07 + time17; time27 = total; time37 = ops/(1.0e3*total); time47 = 2.0e3/time37; time57 = total/cray; /* the following code sequence implements the semantics of the Fortran intrinsics nint(min(time33,time37) */ kf = (time33 ZERO) ? (kf
16、+ .5) : (kf - .5); if (fabs(double)kf) ONE) kflops = 0; else kflops = floor(fabs(double)kf); if (kf ZERO) kflops = -kflops; fprintf(stderr, times for array with leading dimension of%4dn,ldaa); print_time(4); print_time(5); print_time(6); print_time(7); fprintf(stderr,ROLLING);fprintf(stderr,PREC); f
17、printf(stderr, Precision %5d Kflops ; %d Reps n,kflops,NTIMES); /*-*/ print_time (row)int row;fprintf(stderr,%11.2f%11.2f%11.2f%11.0f%11.2f%11.2fn, (double)time0row, (double)time1row, (double)time2row, (double)time3row, (double)time4row, (double)time5row); /*-*/ matgen(a,lda,n,b,norma)REAL a,b,*norm
18、a;int lda, n;/* We would like to declare alda, but c does not allow it. In thisfunction, references to aij are written alda*j+i. */ int init, i, j; init = 1325; *norma = 0.0; for (j = 0; j n; j+) for (i = 0; i *norma) ? alda*j+i : *norma; for (i = 0; i n; i+) bi = 0.0; for (j = 0; j n; j+) for (i =
19、0; i = 0) for (k = 0; k nm1; k+) kp1 = k + 1; /* find l = pivot index */ l = idamax(n-k,&alda*k+k,1) + k; ipvtk = l; /* zero pivot implies this column already triangularized */ if (alda*k+l != ZERO) /* interchange if necessary */ if (l != k) t = alda*k+l; alda*k+l = alda*k+k; alda*k+k = t; /* comput
20、e multipliers */ t = -ONE/alda*k+k; dscal(n-(k+1),t,&alda*k+k+1,1); /* row elimination with column indexing */ for (j = kp1; j n; j+) t = alda*j+l; if (l != k) alda*j+l = alda*j+k; alda*j+k = t; daxpy(n-(k+1),t,&alda*k+k+1,1, &alda*j+k+1,1); else *info = k; ipvtn-1 = n-1; if (alda*(n-1)+(n-1) = ZERO
21、) *info = n-1;/*-*/ dgesl(a,lda,n,ipvt,b,job)int lda,n,ipvt,job;REAL a,b;/* We would like to declare alda, but c does not allow it. In thisfunction, references to aij are written alda*i+j. */* dgesl solves the double precision system a * x = b or trans(a) * x = b using the factors computed by dgeco
22、or dgefa. on entry a double precisionnlda the output from dgeco or dgefa. lda integer the leading dimension of the array a . n integer the order of the matrix a . ipvt integern the pivot vector from dgeco or dgefa. b double precisionn the right hand side vector. job integer = 0 to solve a*x = b , = nonzero to solve trans(a)*x = b where trans(a) is the transpose. on return b the solution vector x . error condition a division by zero will occur if the input factor contains a zero on the diagonal. technically this indicates singularity
copyright@ 2008-2022 冰豆网网站版权所有
经营许可证编号:鄂ICP备2022015515号-1