Fast Auxiliary Space Preconditioning  1.8.4 Feb/15/2016
smoother_csr_poly.c
Go to the documentation of this file.
1 
10 #include <math.h>
11 #include <time.h>
12 #include <float.h>
13 #include <limits.h>
14 
15 #ifdef _OPENMP
16 #include <omp.h>
17 #endif
18 
19 #include "fasp.h"
20 #include "fasp_functs.h"
21 
22 static void bminax(REAL *,INT *,INT *, REAL *, REAL *,INT *, REAL *);
23 static void Diaginv(dCSRmat *, REAL *);
24 static REAL DinvAnorminf(dCSRmat *, REAL *);
25 static void Diagx(REAL *, INT, REAL *, REAL *);
26 static void Rr(dCSRmat *, REAL *, REAL *, REAL *, REAL *, REAL *, REAL *, REAL *, INT);
27 
28 /*---------------------------------*/
29 /*-- Public Function --*/
30 /*---------------------------------*/
31 
49  dvector *brhs,
50  dvector *usol,
51  INT n,
52  INT ndeg,
53  INT L)
54 {
55  // local variables
56  INT i;
57  REAL *b = brhs->val, *u = usol->val;
58  REAL *Dinv = NULL, *r = NULL, *rbar = NULL, *v0 = NULL, *v1 = NULL;
59  REAL *error = NULL, *k = NULL;
60  REAL mu0, mu1, smu0, smu1;
61 
62  /* allocate memory */
63  Dinv = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
64  r = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
65  rbar = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
66  v0 = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
67  v1 = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
68  error = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
69  k = (REAL *) fasp_mem_calloc(6,sizeof(REAL)); // coefficients for calculation
70 
71 
72  // get the inverse of the diagonal of A
73  Diaginv(Amat, Dinv);
74 
75  // set up parameter
76  mu0 = DinvAnorminf(Amat, Dinv); // get the inf norm of Dinv*A;
77 
78  mu0 = 1.0/mu0; mu1 = 4.0*mu0; // default set 8;
79  smu0 = sqrt(mu0); smu1 = sqrt(mu1);
80 
81  k[1] = (mu0+mu1)/2.0;
82  k[2] = (smu0 + smu1)*(smu0 + smu1)/2.0;
83  k[3] = mu0 * mu1;
84 
85  // 4.0*mu0*mu1/(sqrt(mu0)+sqrt(mu1))/(sqrt(mu0)+sqrt(mu1));
86  k[4] = 2.0*k[3]/k[2];
87 
88  // square of (sqrt(kappa)-1)/(sqrt(kappa)+1);
89  k[5] = (mu1-2.0*smu0*smu1+mu0)/(mu1+2.0*smu0*smu1+mu0);
90 
91 #if DEBUG_MODE > 0
92  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
93 #endif
94 
95  // Update
96  for ( i=0; i<L; i++ ) {
97  // get residual
98  fasp_blas_dcsr_mxv(Amat, u, r);// r= Amat*u;
99  fasp_blas_array_axpyz(n, -1, r, b, r);// r= -r+b;
100 
101  // Get correction error = R*r
102  Rr(Amat, Dinv, r, rbar, v0, v1, error, k, ndeg);
103 
104  // update solution
105  fasp_blas_array_axpy(n, 1, error, u);
106 
107  }
108 
109 #if DEBUG_MODE > 1
110  printf("### DEBUG: Degree of polysmoothing is: %d\n",ndeg);
111 #endif
112 
113  // free memory
114  fasp_mem_free(Dinv);
115  fasp_mem_free(r);
116  fasp_mem_free(rbar);
117  fasp_mem_free(v0);
118  fasp_mem_free(v1);
119  fasp_mem_free(error);
120  fasp_mem_free(k);
121 
122 #if DEBUG_MODE > 0
123  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
124 #endif
125 
126  return;
127 
128 }
129 
149  dvector *brhs,
150  dvector *usol,
151  INT n,
152  INT ndeg,
153  INT L)
154 {
155  INT *ia=Amat->IA,*ja=Amat->JA;
156  INT i,j,k,it,jk,iaa,iab,ndeg0; // id and ij for scaling of A
157 
158  REAL *a=Amat->val, *b=brhs->val, *u=usol->val;
159  REAL *v,*v0,*r,*vsave; // one can get away without r as well;
160  REAL smaxa,smina,delinv,s,smu0,smu1,skappa,th,th1,sq;
161  REAL ri,ari,vj,ravj,snj,sm,sm01,smsqrt,delta,delta2,chi;
162 
163 #ifdef _OPENMP
164  // variables for OpenMP
165  INT myid, mybegin, myend;
166  INT nthreads = FASP_GET_NUM_THREADS();
167 #endif
168 
169 #if DEBUG_MODE > 0
170  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
171 #endif
172 
173  /* WORKING MEM */
174  v = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
175  v0 = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
176  vsave = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
177  r = (REAL *) fasp_mem_calloc(n,sizeof(REAL));
178 
179  /* COMPUTE PARAMS*/
180  // min INT for approx -- could be done upfront
181  // i.e., only once per level... only norm1 ...
182  fasp_aux_norm1_(ia,ja,a,&n,&smaxa);
183  smina=smaxa/8;
184  delinv=(smaxa+smina)/(smaxa-smina);
185  th=delinv+sqrt(delinv*delinv-1e+00);
186  th1=1e+00/th;
187  sq=(th-th1)*(th-th1);
188  //
189  ndeg0=floor(log(2*(2e0+th+th1)/sq)/log(th)+1e0);
190  if(ndeg0 < ndeg) ndeg0=ndeg;
191  //
192  smu0=1e+00/smaxa;
193  smu1=1e+00/smina;
194  skappa=sqrt(smaxa/smina);
195  delta=(skappa-1e+00)/(skappa+1);
196  delta2=delta*delta;
197  s=sqrt(smu0)+sqrt(smu1);
198  s=s*s;
199  smsqrt=0.5e+00*s;
200  chi=4e+00*smu0*smu1/s;
201  sm=0.5e+00*(smu0+smu1);
202  sm01=smu0*smu1;
203 
204 #if DEBUG_MODE > 1
205  printf("### DEBUG: Degree of polysmoothing is: %d\n",ndeg);
206 #endif
207 
208  /* BEGIN POLY ITS */
209 
210  /* auv_(ia,ja,a,u,u,&n,&err0); NA: u = 0 */
211  //bminax(b,ia,ja,a,u,&n,r);
212  //for (i=0; i < n; ++i) {res0 += r[i]*r[i];}
213  //res0=sqrt(res0);
214 
215  for (it = 0 ; it < L; it++) {
216  bminax(b,ia,ja,a,u,&n,r);
217 #ifdef _OPENMP
218 #pragma omp parallel for private(myid,mybegin,myend,i,iaa,iab,ari,jk,j,ri) if(n>OPENMP_HOLDS)
219  for (myid=0; myid<nthreads; ++myid) {
220  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
221  for (i=mybegin; i<myend; ++i) {
222 #else
223  for (i=0; i < n ; ++i) {
224 #endif
225  iaa = ia[i];
226  iab = ia[i+1];
227  ari=0e+00; /* ari is (A*r)[i] */
228  if(iab > iaa) {
229  for (jk = iaa; jk < iab; jk++) {
230  j=ja[jk];
231  ari += a[jk] * r[j];
232  }
233  }
234  ri=r[i];
235  v0[i]=sm*ri;
236  v[i]=smsqrt*ri-sm01*ari;
237  }
238 #ifdef _OPENMP
239  }
240 #endif
241  for (i=1; i < ndeg0; ++i) {
242  //for (j=0; j < n ; ++j) vsave[j]=v[j];
243  fasp_array_cp(n, v, vsave);
244 
245 #ifdef _OPENMP
246 #pragma omp parallel for private(myid,mybegin,myend,j,ravj,iaa,iab,jk,k,vj,snj) if(n>OPENMP_HOLDS)
247  for (myid=0; myid<nthreads; ++myid) {
248  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
249  for (j=mybegin; j<myend; ++j) {
250 #else
251  for (j=0; j < n ; ++j) {
252 #endif
253  /* ravj = (r- A*v)[j] */
254  ravj= r[j];
255  iaa = ia[j];
256  iab = ia[j+1];
257  if(iab > iaa) {
258  for (jk = iaa; jk < iab; jk++) {
259  k=ja[jk];
260  ravj -= a[jk] * vsave[k];
261  }
262  }
263  vj=v[j];
264  snj = chi*ravj+delta2*(vj-v0[j]);
265  v0[j]=vj;
266  v[j]=vj+snj;
267  }
268  }
269 #ifdef _OPENMP
270  }
271 #endif
272  fasp_aux_uuplv0_(u,v,&n);
273  //bminax(b,ia,ja,a,u,&n,r);
274  //for (i=0; i < n ; ++i)
275  //resk += r[i]*r[i];
276  //resk=sqrt(resk);
277  //fprintf(stdout,"\nres0=%12.5g\n",res0);
278  //fprintf(stdout,"\nresk=%12.5g\n",resk);
279  //res0=resk;
280  //resk=0.0e0;
281  }
282 
283  fasp_mem_free(v);
284  fasp_mem_free(v0);
285  fasp_mem_free(r);
286  fasp_mem_free(vsave);
287 
288 #if DEBUG_MODE > 0
289  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
290 #endif
291 
292  return;
293 }
294 
295 /*---------------------------------*/
296 /*-- Private Functions --*/
297 /*---------------------------------*/
317 static void bminax(REAL *b,INT *ia,INT *ja, REAL *a, REAL *x,INT *nn, REAL *res)
318 {
319  /* Computes b-A*x */
320 
321  INT i,j,jk,iaa,iab;
322  INT n;
323  REAL u;
324  n=*nn;
325 
326 #ifdef _OPENMP
327  // variables for OpenMP
328  INT myid, mybegin, myend;
329  INT nthreads = FASP_GET_NUM_THREADS();
330 #endif
331 
332 #ifdef _OPENMP
333 #pragma omp parallel for private(myid,mybegin,myend,i,iaa,iab,u,jk,j) if(n>OPENMP_HOLDS)
334  for (myid=0; myid<nthreads; ++myid) {
335  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
336  for (i=mybegin; i<myend; ++i) {
337 #else
338  for (i=0; i < n ; ++i) {
339 #endif
340  iaa = ia[i];
341  iab = ia[i+1];
342  u = b[i];
343  if(iab > iaa)
344  for (jk = iaa; jk < iab; jk++) {
345  j=ja[jk];
346  u -= a[jk] * x[j];
347  }
348  res[i] = u;
349  }
350 #ifdef _OPENMP
351  }
352 #endif
353  return;
354 }
355 
370 static void Diaginv (dCSRmat *Amat, REAL *Dinv)
371 {
372  const INT n = Amat->row;
373  const INT *ia = Amat->IA, *ja = Amat->JA;
374  const REAL *a = Amat->val;
375  INT i,j;
376 
377 #ifdef _OPENMP
378 #pragma omp parallel for private(j) if(n>OPENMP_HOLDS)
379 #endif
380  for (i=0; i<n; i++) {
381  for(j=ia[i]; j<ia[i+1]; j++) {
382  if(i==ja[j]) // find the diagonal
383  break;
384  }
385  Dinv[i] = 1.0/a[j];
386  }
387  return;
388 }
389 
406 static REAL DinvAnorminf(dCSRmat *Amat, REAL *Dinv)
407 {
408  //local variable
409  const INT n = Amat->row;
410  const INT *ia = Amat->IA;
411  const REAL *a = Amat->val;
412  unsigned INT i,j;
413  REAL norm, temp;
414 
415 #ifdef _OPENMP
416  // variables for OpenMP
417  INT myid, mybegin, myend;
418  REAL sub_norm = 0.;
419  INT nthreads = FASP_GET_NUM_THREADS();
420 #endif
421 
422  // get the infinity norm of Dinv*A
423  norm = 0.;
424 #ifdef _OPENMP
425 #pragma omp parallel for private(myid,mybegin,myend,i,temp,sub_norm) if(n>OPENMP_HOLDS)
426  for (myid=0; myid<nthreads; ++myid) {
427  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
428  for (i=mybegin; i<myend; ++i) {
429 #else
430  for (i=0; i<n; i++) {
431 #endif
432  temp=0.;
433  for (j=ia[i]; j<ia[i+1]; j++) {
434  temp += ABS(a[j]);
435  }
436  temp *= Dinv[i]; // temp is the L1 norm of the ith row of Dinv*A;
437 #ifdef _OPENMP
438  sub_norm = MAX(sub_norm, temp);
439 #else
440  norm = MAX(norm, temp);
441 #endif
442  }
443 #ifdef _OPENMP
444 #pragma omp critical(norm)
445  norm = MAX(norm, sub_norm);
446  }
447 #endif
448 
449  return norm;
450 }
451 
468 static void Diagx(REAL *Dinv, INT n, REAL *x, REAL *b)
469 {
470  unsigned INT i;
471 
472  // Variables for OpenMP
473  INT nthreads = 1, use_openmp = FALSE;
474  INT myid, mybegin, myend;
475 
476 #ifdef _OPENMP
477  if (n > OPENMP_HOLDS) {
478  use_openmp = TRUE;
479  nthreads = FASP_GET_NUM_THREADS();
480  }
481 #endif
482 
483  if (use_openmp) {
484 #ifdef _OPENMP
485 #pragma omp parallel for private(myid, mybegin, myend, i)
486 #endif
487  for (myid = 0; myid < nthreads; myid++) {
488  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
489  for (i = mybegin; i < myend; i++) {
490  b[i] = Dinv[i] * x[i];
491  }
492  }
493  }
494  else {
495  for (i=0; i<n; i++) {
496  b[i] = Dinv[i] * x[i];
497  }
498  }
499  return;
500 }
501 
524 static void Rr (dCSRmat *Amat,
525  REAL *Dinv,
526  REAL *r,
527  REAL *rbar,
528  REAL *v0,
529  REAL *v1,
530  REAL *vnew,
531  REAL *k,
532  INT m)
533 {
534  // local variables
535  const INT n = Amat->row;
536  INT i,j;
537 
538 #ifdef _OPENMP
539  // variables for OpenMP
540  INT myid, mybegin, myend;
541  INT nthreads = FASP_GET_NUM_THREADS();
542 #endif
543 
544  //1 set up rbar
545  Diagx(Dinv, n, r, rbar);// rbar = Dinv *r;
546 
547  //2 set up v0, v1;
548  fasp_blas_dcsr_mxv(Amat, rbar, v1);//v1= A*rbar;
549  Diagx(Dinv, n, v1, v1); // v1=Dinv *v1;
550 
551 #ifdef _OPENMP
552 #pragma omp parallel for if(n>OPENMP_HOLDS)
553 #endif
554  for(i=0;i<n;i++) {
555  v0[i] = k[1] * rbar[i];
556  v1[i] = k[2] * rbar[i] - k[3] * v1[i];
557  }
558 
559  //3 iterate to get v_(j+1)
560 
561  for (j=1;j<m;j++) {
562  fasp_blas_dcsr_mxv(Amat, v1, rbar);//rbar= A*v_(j);
563 
564 #ifdef _OPENMP
565 #pragma omp parallel for private(myid,mybegin,myend,i) if(n>OPENMP_HOLDS)
566  for (myid=0; myid<nthreads; ++myid) {
567  FASP_GET_START_END(myid, nthreads, n, &mybegin, &myend);
568  for (i=mybegin; i<myend; ++i) {
569 #else
570  for(i=0;i<n;i++) {
571 #endif
572  rbar[i] = (r[i] - rbar[i])*Dinv[i];// indeed rbar=Dinv*(r-A*v_(j));
573  vnew[i] = v1[i] + k[5] *(v1[i] - v0[i]) + k[4] * rbar[i];// compute v_(j+1)
574  // prepare for next cycle
575  v0[i]=v1[i];
576  v1[i]=vnew[i];
577  }
578 #ifdef _OPENMP
579  }
580 #endif
581  }
582 }
583 
584 /*---------------------------------*/
585 /*-- End of File --*/
586 /*---------------------------------*/
#define TRUE
Definition of logic type.
Definition: fasp_const.h:67
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:163
#define REAL
Definition: fasp.h:67
REAL * val
nonzero entries of A
Definition: fasp.h:166
REAL * val
actual vector entries
Definition: fasp.h:348
void * fasp_mem_calloc(LONGLONG size, INT type)
1M = 1024*1024
Definition: memory.c:60
Vector with n entries of REAL type.
Definition: fasp.h:342
#define INT
Definition: fasp.h:64
void FASP_GET_START_END(INT procid, INT nprocs, INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: threads.c:83
void fasp_mem_free(void *mem)
Free up previous allocated memory body.
Definition: memory.c:150
#define OPENMP_HOLDS
Definition: fasp_const.h:248
Main header file for FASP.
INT row
row number of matrix A, m
Definition: fasp.h:151
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:148
void fasp_blas_array_axpy(const INT n, const REAL a, REAL *x, REAL *y)
y = a*x + y
Definition: blas_array.c:87
#define ABS(a)
Definition: fasp.h:74
void fasp_smoother_dcsr_poly(dCSRmat *Amat, dvector *brhs, dvector *usol, INT n, INT ndeg, INT L)
poly approx to A^{-1} as MG smoother
void fasp_array_cp(const INT n, REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: array.c:165
#define FALSE
Definition: fasp_const.h:68
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:160
void fasp_smoother_dcsr_poly_old(dCSRmat *Amat, dvector *brhs, dvector *usol, INT n, INT ndeg, INT L)
poly approx to A^{-1} as MG smoother: JK<Z2010
void fasp_blas_array_axpyz(const INT n, const REAL a, REAL *x, REAL *y, REAL *z)
z = a*x + y
Definition: blas_array.c:167
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:72
void fasp_blas_dcsr_mxv(dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: blas_csr.c:225