Fast Auxiliary Space Preconditioning  1.8.4 Feb/15/2016
smoother_csr.c
Go to the documentation of this file.
1 
6 #include <math.h>
7 
8 #ifdef _OPENMP
9 #include <omp.h>
10 #endif
11 
12 #include "fasp.h"
13 #include "fasp_functs.h"
14 
15 /*---------------------------------*/
16 /*-- Decoration of Functions --*/
17 /*---------------------------------*/
18 
19 static void swep2db (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f, INT nbegx,
20  INT nbegy, INT *mark, INT nx, INT ny);
21 static void swep3db (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f, INT nbegx,
22  INT nbegy, INT nbegz, INT *mark, INT nx, INT ny, INT nz);
23 static void rb0b2d (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f,
24  INT *mark, INT nx, INT ny, INT nsweeps);
25 static void rb0b3d (INT *ia, INT *ja, REAL *aa,REAL *u, REAL *f,
26  INT *mark, INT nx, INT ny, INT nz, INT nsweeps);
27 static void swep2df (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f, INT nbegx,
28  INT nbegy, INT *mark, INT nx, INT ny);
29 static void swep3df (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f, INT nbegx,
30  INT nbegy, INT nbegz, INT *mark, INT nx, INT ny, INT nz);
31 static void rb0f2d (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f,
32  INT *mark, INT nx, INT ny, INT nsweeps);
33 static void rb0f3d (INT *ia, INT *ja, REAL *aa,REAL *u, REAL *f, INT *mark,
34  INT nx, INT ny, INT nz, INT nsweeps);
35 
36 /*---------------------------------*/
37 /*-- Public Functions --*/
38 /*---------------------------------*/
39 
60  const INT i_1,
61  const INT i_n,
62  const INT s,
63  dCSRmat *A,
64  dvector *b,
65  INT L)
66 {
67  const INT N = ABS(i_n - i_1)+1;
68  const INT *ia=A->IA, *ja=A->JA;
69  const REAL *aj=A->val,*bval=b->val;
70  REAL *uval=u->val;
71  REAL w = 0.8; //0.8
72  // local variables
73  INT i,j,k,begin_row,end_row;
74 
75  // OpenMP variables
76 #ifdef _OPENMP
77  INT myid, mybegin, myend;
78  INT nthreads = FASP_GET_NUM_THREADS();
79 #endif
80 
81  REAL *t = (REAL *)fasp_mem_calloc(N,sizeof(REAL));
82  REAL *d = (REAL *)fasp_mem_calloc(N,sizeof(REAL));
83 
84  while (L--) {
85  if (s>0) {
86 #ifdef _OPENMP
87  if (N > OPENMP_HOLDS) {
88 #pragma omp parallel for private(myid, mybegin, myend, begin_row, end_row, i, k, j)
89  for (myid=0; myid<nthreads; ++myid) {
90  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
91  mybegin += i_1; myend += i_1;
92  for (i=mybegin; i<myend; i+=s) {
93  t[i]=bval[i];
94  begin_row=ia[i],end_row=ia[i+1];
95  for (k=begin_row; k<end_row; ++k) {
96  j=ja[k];
97  if (i!=j) t[i]-=aj[k]*uval[j];
98  else d[i]=aj[k];
99  }
100  }
101  }
102  }
103  else {
104 #endif
105  for (i=i_1;i<=i_n;i+=s) {
106  t[i]=bval[i];
107  begin_row=ia[i],end_row=ia[i+1];
108  for (k=begin_row;k<end_row;++k) {
109  j=ja[k];
110  if (i!=j) t[i]-=aj[k]*uval[j];
111  else d[i]=aj[k];
112  }
113  }
114 #ifdef _OPENMP
115  }
116 #endif
117 
118 #ifdef _OPENMP
119 #pragma omp parallel for private (i)
120 #endif
121  for (i=i_1;i<=i_n;i+=s) {
122  //if (ABS(d[i])>SMALLREAL) uval[i]= w*t[i]/d[i];
123  if (ABS(d[i])>SMALLREAL) uval[i]=(1-w)*uval[i]+ w*t[i]/d[i];
124  }
125  }
126  else {
127 #ifdef _OPENMP
128  if (N > OPENMP_HOLDS) {
129 #pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, k, j)
130  for (myid=0; myid<nthreads; myid++) {
131  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
132  mybegin = i_1-mybegin; myend = i_1-myend;
133  for (i=mybegin; i>myend; i+=s) {
134  t[i]=bval[i];
135  begin_row=ia[i],end_row=ia[i+1];
136  for (k=begin_row; k<end_row; ++k) {
137  j=ja[k];
138  if (i!=j) t[i]-=aj[k]*uval[j];
139  else d[i]=aj[k];
140  }
141  }
142  }
143  }
144  else {
145 #endif
146  for (i=i_1;i>=i_n;i+=s) {
147  t[i]=bval[i];
148  begin_row=ia[i],end_row=ia[i+1];
149  for (k=begin_row;k<end_row;++k) {
150  j=ja[k];
151  if (i!=j) t[i]-=aj[k]*uval[j];
152  else d[i]=aj[k];
153  }
154  }
155 #ifdef _OPENMP
156  }
157 #endif
158 
159 #ifdef _OPENMP
160 #pragma omp parallel for private(i)
161 #endif
162  for (i=i_1;i>=i_n;i+=s) {
163  //if (ABS(d[i])>SMALLREAL) uval[i]=t[i]/d[i];
164  if (ABS(d[i])>SMALLREAL) uval[i]=(1-w)*uval[i]+ w*t[i]/d[i];
165  }
166  }
167 
168  } // end while
169 
170  fasp_mem_free(t);
171  fasp_mem_free(d);
172 
173  return;
174 }
175 
196  const INT i_1,
197  const INT i_n,
198  const INT s,
199  dCSRmat *A,
200  dvector *b,
201  INT L)
202 {
203  const INT *ia=A->IA,*ja=A->JA;
204  const REAL *aj=A->val,*bval=b->val;
205  REAL *uval=u->val;
206 
207  // local variables
208  INT i,j,k,begin_row,end_row;
209  REAL t,d=0.0;
210 
211 #ifdef _OPENMP
212  // variables for OpenMP
213  const INT N = ABS(i_n - i_1)+1;
214  INT myid, mybegin, myend;
215  INT nthreads = FASP_GET_NUM_THREADS();
216 #endif
217 
218  if (s > 0) {
219  while (L--) {
220 #ifdef _OPENMP
221  if (N >OPENMP_HOLDS) {
222 #pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, d, k, j)
223  for (myid=0; myid<nthreads; myid++) {
224  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
225  mybegin += i_1, myend += i_1;
226  for (i=mybegin; i<myend; i+=s) {
227  t = bval[i];
228  begin_row=ia[i],end_row=ia[i+1];
229 #if DIAGONAL_PREF // diagonal first
230  d=aj[begin_row];
231  if (ABS(d)>SMALLREAL) {
232  for (k=begin_row+1;k<end_row;++k) {
233  j=ja[k];
234  t-=aj[k]*uval[j];
235  }
236  uval[i]=t/d;
237  }
238 #else // general order
239  for (k=begin_row;k<end_row;++k) {
240  j=ja[k];
241  if (i!=j)
242  t-=aj[k]*uval[j];
243  else if (ABS(aj[k])>SMALLREAL) d=1.e+0/aj[k];
244  }
245  uval[i]=t*d;
246 #endif
247  } // end for i
248  }
249  }
250  else {
251 #endif
252  for (i=i_1;i<=i_n;i+=s) {
253  t = bval[i];
254  begin_row=ia[i],end_row=ia[i+1];
255 
256 #if DIAGONAL_PREF // diagonal first
257  d=aj[begin_row];
258  if (ABS(d)>SMALLREAL) {
259  for (k=begin_row+1;k<end_row;++k) {
260  j=ja[k];
261  t-=aj[k]*uval[j];
262  }
263  uval[i]=t/d;
264  }
265 #else // general order
266  for (k=begin_row;k<end_row;++k) {
267  j=ja[k];
268  if (i!=j)
269  t-=aj[k]*uval[j];
270  else if (ABS(aj[k])>SMALLREAL) d=1.e+0/aj[k];
271  }
272  uval[i]=t*d;
273 #endif
274  } // end for i
275 #ifdef _OPENMP
276  }
277 #endif
278  } // end while
279 
280  } // if s
281  else {
282 
283  while (L--) {
284 #ifdef _OPENMP
285  if (N > OPENMP_HOLDS) {
286 #pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, d, k, j, t)
287  for (myid=0; myid<nthreads; myid++) {
288  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
289  mybegin = i_1 - mybegin; myend = i_1 - myend;
290  for (i=mybegin; i>myend; i+=s) {
291  t=bval[i];
292  begin_row=ia[i],end_row=ia[i+1];
293 #if DIAGONAL_PREF // diagonal first
294  d=aj[begin_row];
295  if (ABS(d)>SMALLREAL) {
296  for (k=begin_row+1;k<end_row;++k) {
297  j=ja[k];
298  t-=aj[k]*uval[j];
299  }
300  uval[i]=t/d;
301  }
302 #else // general order
303  for (k=begin_row;k<end_row;++k) {
304  j=ja[k];
305  if (i!=j)
306  t-=aj[k]*uval[j];
307  else if (ABS(aj[k])>SMALLREAL) d=1.0/aj[k];
308  }
309  uval[i]=t*d;
310 #endif
311  } // end for i
312  }
313  }
314  else {
315 #endif
316  for (i=i_1;i>=i_n;i+=s) {
317  t=bval[i];
318  begin_row=ia[i],end_row=ia[i+1];
319 #if DIAGONAL_PREF // diagonal first
320  d=aj[begin_row];
321  if (ABS(d)>SMALLREAL) {
322  for (k=begin_row+1;k<end_row;++k) {
323  j=ja[k];
324  t-=aj[k]*uval[j];
325  }
326  uval[i]=t/d;
327  }
328 #else // general order
329  for (k=begin_row;k<end_row;++k) {
330  j=ja[k];
331  if (i!=j)
332  t-=aj[k]*uval[j];
333  else if (ABS(aj[k])>SMALLREAL) d=1.0/aj[k];
334  }
335  uval[i]=t*d;
336 #endif
337  } // end for i
338 #ifdef _OPENMP
339  }
340 #endif
341  } // end while
342  } // end if
343  return;
344 }
345 
365  dCSRmat *A,
366  dvector *b,
367  INT L,
368  INT *mark,
369  const INT order)
370 {
371  const INT nrow = b->row; // number of rows
372  const INT *ia = A->IA, *ja = A->JA;
373  const REAL *aj = A->val, *bval = b->val;
374  REAL *uval = u->val;
375 
376  INT i,j,k,begin_row,end_row;
377  REAL t,d=0.0;
378 
379 #ifdef _OPENMP
380  INT myid,mybegin,myend;
381  INT nthreads = FASP_GET_NUM_THREADS();
382 #endif
383 
384  // F-point first, C-point second
385  if (order == FPFIRST) {
386 
387  while (L--) {
388 
389 #ifdef _OPENMP
390  if (nrow > OPENMP_HOLDS) {
391 #pragma omp parallel for private(myid, mybegin, myend, i,t,begin_row,end_row,k,j,d)
392  for (myid = 0; myid < nthreads; myid ++) {
393  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
394  for (i=mybegin; i<myend; i++) {
395  if (mark[i] != 1) {
396  t = bval[i];
397  begin_row = ia[i], end_row = ia[i+1];
398 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
399  d = aj[begin_row];
400  for (k = begin_row+1; k < end_row; k ++) {
401  j = ja[k];
402  t -= aj[k]*uval[j];
403  } // end for k
404 #else
405  for (k = begin_row; k < end_row; k ++) {
406  j = ja[k];
407  if (i!=j) t -= aj[k]*uval[j];
408  else d = aj[k];
409  } // end for k
410 #endif // end if DIAG_PREF
411  if (ABS(d) > SMALLREAL) uval[i] = t/d;
412  }
413  } // end for i
414  }
415  }
416  else {
417 #endif
418  for (i = 0; i < nrow; i ++) {
419  if (mark[i] != 1) {
420  t = bval[i];
421  begin_row = ia[i], end_row = ia[i+1];
422 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
423  d = aj[begin_row];
424  for (k = begin_row+1; k < end_row; k ++) {
425  j = ja[k];
426  t -= aj[k]*uval[j];
427  } // end for k
428 #else
429  for (k = begin_row; k < end_row; k ++) {
430  j = ja[k];
431  if (i!=j) t -= aj[k]*uval[j];
432  else d = aj[k];
433  } // end for k
434 #endif // end if DIAG_PREF
435  if (ABS(d) > SMALLREAL) uval[i] = t/d;
436  }
437  } // end for i
438 #ifdef _OPENMP
439  }
440 #endif
441 
442 #ifdef _OPENMP
443  if (nrow > OPENMP_HOLDS) {
444 #pragma omp parallel for private(myid,mybegin,myend,i,t,begin_row,end_row,k,j,d)
445  for (myid = 0; myid < nthreads; myid ++) {
446  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
447  for (i=mybegin; i<myend; i++) {
448  if (mark[i] == 1) {
449  t = bval[i];
450  begin_row = ia[i], end_row = ia[i+1];
451 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
452  d = aj[begin_row];
453  for (k = begin_row+1; k < end_row; k ++) {
454  j = ja[k];
455  t -= aj[k]*uval[j];
456  } // end for k
457 #else
458  for (k = begin_row; k < end_row; k ++) {
459  j = ja[k];
460  if (i!=j) t -= aj[k]*uval[j];
461  else d = aj[k];
462  } // end for k
463 #endif // end if DIAG_PREF
464  if (ABS(d) > SMALLREAL) uval[i] = t/d;
465  }
466  } // end for i
467  }
468  }
469  else {
470 #endif
471  for (i = 0; i < nrow; i ++) {
472  if (mark[i] == 1) {
473  t = bval[i];
474  begin_row = ia[i], end_row = ia[i+1];
475 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
476  d = aj[begin_row];
477  for (k = begin_row+1; k < end_row; k ++) {
478  j = ja[k];
479  t -= aj[k]*uval[j];
480  } // end for k
481 #else
482  for (k = begin_row; k < end_row; k ++) {
483  j = ja[k];
484  if (i!=j) t -= aj[k]*uval[j];
485  else d = aj[k];
486  } // end for k
487 #endif // end if DIAG_PREF
488  if (ABS(d) > SMALLREAL) uval[i] = t/d;
489  }
490  } // end for i
491 #ifdef _OPENMP
492  }
493 #endif
494  } // end while
495 
496  }
497 
498  // C-point first, F-point second
499  else {
500 
501  while (L--) {
502 #ifdef _OPENMP
503  if (nrow > OPENMP_HOLDS) {
504 #pragma omp parallel for private(myid,mybegin,myend,t,i,begin_row,end_row,k,j,d)
505  for (myid = 0; myid < nthreads; myid ++) {
506  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
507  for (i=mybegin; i<myend; i++) {
508  if (mark[i] == 1) {
509  t = bval[i];
510  begin_row = ia[i],end_row = ia[i+1];
511 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
512  d = aj[begin_row];
513  for (k = begin_row+1; k < end_row; k ++) {
514  j = ja[k];
515  t -= aj[k]*uval[j];
516  } // end for k
517 #else
518  for (k = begin_row; k < end_row; k ++) {
519  j = ja[k];
520  if (i!=j) t -= aj[k]*uval[j];
521  else d = aj[k];
522  } // end for k
523 #endif // end if DIAG_PREF
524  if (ABS(d) > SMALLREAL) uval[i] = t/d;
525  }
526  } // end for i
527  }
528  }
529  else {
530 #endif
531  for (i = 0; i < nrow; i ++) {
532  if (mark[i] == 1) {
533  t = bval[i];
534  begin_row = ia[i],end_row = ia[i+1];
535 #if DIAGONAL_PREF // Added by Chensong on 09/22/2012
536  d = aj[begin_row];
537  for (k = begin_row+1; k < end_row; k ++) {
538  j = ja[k];
539  t -= aj[k]*uval[j];
540  } // end for k
541 #else
542  for (k = begin_row; k < end_row; k ++) {
543  j = ja[k];
544  if (i!=j) t -= aj[k]*uval[j];
545  else d = aj[k];
546  } // end for k
547 #endif // end if DIAG_PREF
548  if (ABS(d) > SMALLREAL) uval[i] = t/d;
549  }
550  } // end for i
551 #ifdef _OPENMP
552  }
553 #endif
554 
555 #ifdef _OPENMP
556  if (nrow > OPENMP_HOLDS) {
557 #pragma omp parallel for private(myid, mybegin, myend, i,t,begin_row,end_row,k,j,d)
558  for (myid = 0; myid < nthreads; myid ++) {
559  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
560  for (i=mybegin; i<myend; i++) {
561  if (mark[i] != 1) {
562  t = bval[i];
563  begin_row = ia[i],end_row = ia[i+1];
564 #if DIAGONAL_PREF // Added by Chensong on 01/17/2013
565  d = aj[begin_row];
566  for (k = begin_row+1; k < end_row; k ++) {
567  j = ja[k];
568  t -= aj[k]*uval[j];
569  } // end for k
570 #else
571  for (k = begin_row; k < end_row; k ++) {
572  j = ja[k];
573  if (i!=j) t -= aj[k]*uval[j];
574  else d = aj[k];
575  } // end for k
576 #endif // end if DIAG_PREF
577  if (ABS(d) > SMALLREAL) uval[i] = t/d;
578  }
579  } // end for i
580  }
581  }
582  else {
583 #endif
584  for (i = 0; i < nrow; i ++) {
585  if (mark[i] != 1) {
586  t = bval[i];
587  begin_row = ia[i],end_row = ia[i+1];
588 #if DIAGONAL_PREF // Added by Chensong on 09/22/2012
589  d = aj[begin_row];
590  for (k = begin_row+1; k < end_row; k ++) {
591  j = ja[k];
592  t -= aj[k]*uval[j];
593  } // end for k
594 #else
595  for (k = begin_row; k < end_row; k ++) {
596  j = ja[k];
597  if (i!=j) t -= aj[k]*uval[j];
598  else d = aj[k];
599  } // end for k
600 #endif // end if DIAG_PREF
601  if (ABS(d) > SMALLREAL) uval[i] = t/d;
602  }
603  } // end for i
604 #ifdef _OPENMP
605  }
606 #endif
607  } // end while
608 
609  } // end if order
610 
611  return;
612 }
613 
630  dCSRmat *A,
631  dvector *b,
632  INT L)
633 {
634  const INT nm1=b->row-1;
635  const INT *ia=A->IA,*ja=A->JA;
636  const REAL *aj=A->val,*bval=b->val;
637  REAL *uval=u->val;
638 
639  // local variables
640  INT i,j,k,begin_row,end_row;
641  REAL t,d=0;
642 
643 #ifdef _OPENMP
644  // variables for OpenMP
645  INT myid, mybegin, myend, up;
646  INT nthreads = FASP_GET_NUM_THREADS();
647 #endif
648 
649  while (L--) {
650  // forward sweep
651 #ifdef _OPENMP
652  up = nm1 + 1;
653  if (up > OPENMP_HOLDS) {
654 #pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, j, k, d)
655  for (myid=0; myid<nthreads; myid++) {
656  FASP_GET_START_END(myid, nthreads, up, &mybegin, &myend);
657  for (i=mybegin; i<myend; i++) {
658  t=bval[i];
659  begin_row=ia[i], end_row=ia[i+1];
660  for (k=begin_row;k<end_row;++k) {
661  j=ja[k];
662  if (i!=j) t-=aj[k]*uval[j];
663  else d=aj[k];
664  } // end for k
665  if (ABS(d)>SMALLREAL) uval[i]=t/d;
666  } // end for i
667  }
668  }
669  else {
670 #endif
671  for (i=0;i<=nm1;++i) {
672  t=bval[i];
673  begin_row=ia[i], end_row=ia[i+1];
674  for (k=begin_row;k<end_row;++k) {
675  j=ja[k];
676  if (i!=j) t-=aj[k]*uval[j];
677  else d=aj[k];
678  } // end for k
679  if (ABS(d)>SMALLREAL) uval[i]=t/d;
680  } // end for i
681 #ifdef _OPENMP
682  }
683 #endif
684 
685  // backward sweep
686 #ifdef _OPENMP
687  up = nm1;
688  if (up > OPENMP_HOLDS) {
689 #pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
690  for (myid=0; myid<nthreads; myid++) {
691  FASP_GET_START_END(myid, nthreads, up, &mybegin, &myend);
692  mybegin = nm1-1-mybegin; myend = nm1-1-myend;
693  for (i=mybegin; i>myend; i--) {
694  t=bval[i];
695  begin_row=ia[i], end_row=ia[i+1];
696  for (k=begin_row; k<end_row; k++) {
697  j=ja[k];
698  if (i!=j) t-=aj[k]*uval[j];
699  else d=aj[k];
700  } // end for k
701  if (ABS(d)>SMALLREAL) uval[i]=t/d;
702  } // end for i
703  }
704  }
705  else {
706 #endif
707  for (i=nm1-1;i>=0;--i) {
708  t=bval[i];
709  begin_row=ia[i], end_row=ia[i+1];
710  for (k=begin_row;k<end_row;++k) {
711  j=ja[k];
712  if (i!=j) t-=aj[k]*uval[j];
713  else d=aj[k];
714  } // end for k
715  if (ABS(d)>SMALLREAL) uval[i]=t/d;
716  } // end for i
717 #ifdef _OPENMP
718  }
719 #endif
720  } // end while
721 
722  return;
723 }
724 
746  const INT i_1,
747  const INT i_n,
748  const INT s,
749  dCSRmat *A,
750  dvector *b,
751  INT L,
752  const REAL w)
753 {
754  const INT *ia=A->IA,*ja=A->JA;
755  const REAL *aj=A->val,*bval=b->val;
756  REAL *uval=u->val;
757 
758  // local variables
759  INT i,j,k,begin_row,end_row;
760  REAL t, d=0;
761 
762 #ifdef _OPENMP
763  // variables for OpenMP
764  const INT N = ABS(i_n - i_1)+1;
765  INT myid, mybegin, myend;
766  INT nthreads = FASP_GET_NUM_THREADS();
767 #endif
768 
769  while (L--) {
770  if (s>0) {
771 #ifdef _OPENMP
772  if (N > OPENMP_HOLDS) {
773 #pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
774  for (myid=0; myid<nthreads; myid++) {
775  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
776  mybegin += i_1, myend += i_1;
777  for (i=mybegin; i<myend; i+=s) {
778  t=bval[i];
779  begin_row=ia[i], end_row=ia[i+1];
780  for (k=begin_row; k<end_row; k++) {
781  j=ja[k];
782  if (i!=j)
783  t-=aj[k]*uval[j];
784  else
785  d=aj[k];
786  }
787  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
788  }
789  }
790 
791  }
792  else {
793 #endif
794  for (i=i_1; i<=i_n; i+=s) {
795  t=bval[i];
796  begin_row=ia[i], end_row=ia[i+1];
797  for (k=begin_row; k<end_row; ++k) {
798  j=ja[k];
799  if (i!=j)
800  t-=aj[k]*uval[j];
801  else
802  d=aj[k];
803  }
804  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
805  }
806 #ifdef _OPENMP
807  }
808 #endif
809  }
810  else {
811 #ifdef _OPENMP
812  if (N > OPENMP_HOLDS) {
813 #pragma omp parallel for private(myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
814  for (myid=0; myid<nthreads; myid++) {
815  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
816  mybegin = i_1 - mybegin, myend = i_1 - myend;
817  for (i=mybegin; i>myend; i+=s) {
818  t=bval[i];
819  begin_row=ia[i],end_row=ia[i+1];
820  for (k=begin_row;k<end_row;++k) {
821  j=ja[k];
822  if (i!=j)
823  t-=aj[k]*uval[j];
824  else
825  d=aj[k];
826  }
827  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
828  }
829  }
830  }
831  else {
832 #endif
833  for (i=i_1;i>=i_n;i+=s) {
834  t=bval[i];
835  begin_row=ia[i],end_row=ia[i+1];
836  for (k=begin_row;k<end_row;++k) {
837  j=ja[k];
838  if (i!=j)
839  t-=aj[k]*uval[j];
840  else
841  d=aj[k];
842  }
843  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
844  }
845 #ifdef _OPENMP
846  }
847 #endif
848  }
849  } // end while
850 
851  return;
852 }
853 
874  dCSRmat *A,
875  dvector *b,
876  INT L,
877  const REAL w,
878  INT *mark,
879  const INT order )
880 {
881  const INT nrow = b->row; // number of rows
882  const INT *ia = A->IA, *ja=A->JA;
883  const REAL *aj = A->val,*bval=b->val;
884  REAL *uval = u->val;
885 
886  // local variables
887  INT i,j,k,begin_row,end_row;
888  REAL t,d=0.0;
889 
890 #ifdef _OPENMP
891  INT myid, mybegin, myend;
892  INT nthreads = FASP_GET_NUM_THREADS();
893 #endif
894 
895  // F-point first
896  if (order == -1) {
897  while (L--) {
898 #ifdef _OPENMP
899  if (nrow > OPENMP_HOLDS) {
900 #pragma omp parallel for private (myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
901  for (myid = 0; myid < nthreads; myid++) {
902  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
903  for (i = mybegin; i < myend; i ++) {
904  if (mark[i] == 0 || mark[i] == 2) {
905  t = bval[i];
906  begin_row = ia[i], end_row = ia[i+1];
907  for (k = begin_row; k < end_row; k ++) {
908  j = ja[k];
909  if (i!=j) t -= aj[k]*uval[j];
910  else d = aj[k];
911  } // end for k
912  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
913  }
914  }
915  }
916  } // end for i
917  else {
918 #endif
919  for (i = 0; i < nrow; i ++) {
920  if (mark[i] == 0 || mark[i] == 2) {
921  t = bval[i];
922  begin_row = ia[i], end_row = ia[i+1];
923  for (k = begin_row; k < end_row; k ++) {
924  j = ja[k];
925  if (i!=j) t -= aj[k]*uval[j];
926  else d = aj[k];
927  } // end for k
928  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
929  }
930  } // end for i
931 #ifdef _OPENMP
932  }
933 #endif
934 
935 #ifdef _OPENMP
936  if (nrow > OPENMP_HOLDS) {
937 #pragma omp parallel for private(myid, i, mybegin, myend, t, begin_row, end_row, k, j, d)
938  for (myid = 0; myid < nthreads; myid++) {
939  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
940  for (i = mybegin; i < myend; i++) {
941  if (mark[i] == 1) {
942  t = bval[i];
943  begin_row = ia[i], end_row = ia[i+1];
944  for (k = begin_row; k < end_row; k ++) {
945  j = ja[k];
946  if (i!=j) t -= aj[k]*uval[j];
947  else d = aj[k];
948  } // end for k
949  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
950  }
951  } // end for i
952  }
953  }
954  else {
955 #endif
956  for (i = 0; i < nrow; i ++) {
957  if (mark[i] == 1) {
958  t = bval[i];
959  begin_row = ia[i], end_row = ia[i+1];
960  for (k = begin_row; k < end_row; k ++) {
961  j = ja[k];
962  if (i!=j) t -= aj[k]*uval[j];
963  else d = aj[k];
964  } // end for k
965  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
966  }
967  } // end for i
968 #ifdef _OPENMP
969  }
970 #endif
971  } // end while
972  }
973  else {
974  while (L--) {
975 #ifdef _OPENMP
976  if (nrow > OPENMP_HOLDS) {
977 #pragma omp parallel for private(myid, mybegin, myend, i, t, k, j, d, begin_row, end_row)
978  for (myid = 0; myid < nthreads; myid++) {
979  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
980  for (i = mybegin; i < myend; i++) {
981  if (mark[i] == 1) {
982  t = bval[i];
983  begin_row = ia[i], end_row = ia[i+1];
984  for (k = begin_row; k < end_row; k ++) {
985  j = ja[k];
986  if (i!=j) t -= aj[k]*uval[j];
987  else d = aj[k];
988  } // end for k
989  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
990  }
991  } // end for i
992  }
993  }
994  else {
995 #endif
996  for (i = 0; i < nrow; i ++) {
997  if (mark[i] == 1) {
998  t = bval[i];
999  begin_row = ia[i], end_row = ia[i+1];
1000  for (k = begin_row; k < end_row; k ++) {
1001  j = ja[k];
1002  if (i!=j) t -= aj[k]*uval[j];
1003  else d = aj[k];
1004  } // end for k
1005  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
1006  }
1007  } // end for i
1008 #ifdef _OPENMP
1009  }
1010 #endif
1011 
1012 #ifdef _OPENMP
1013  if (nrow > OPENMP_HOLDS) {
1014 #pragma omp parallel for private (myid, mybegin, myend, i, t, begin_row, end_row, k, j, d)
1015  for (myid = 0; myid < nthreads; myid++) {
1016  FASP_GET_START_END(myid, nthreads, nrow, &mybegin, &myend);
1017  for (i = mybegin; i < myend; i++) {
1018  if (mark[i] != 1) {
1019  t = bval[i];
1020  begin_row = ia[i], end_row = ia[i+1];
1021  for (k = begin_row; k < end_row; k ++) {
1022  j = ja[k];
1023  if (i!=j) t -= aj[k]*uval[j];
1024  else d = aj[k];
1025  } // end for k
1026  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
1027  }
1028  }
1029  }
1030  } // end for i
1031  else {
1032 #endif
1033  for (i = 0; i < nrow; i ++) {
1034  if (mark[i] != 1) {
1035  t = bval[i];
1036  begin_row = ia[i], end_row = ia[i+1];
1037  for (k = begin_row; k < end_row; k ++) {
1038  j = ja[k];
1039  if (i!=j) t -= aj[k]*uval[j];
1040  else d = aj[k];
1041  } // end for k
1042  if (ABS(d)>SMALLREAL) uval[i]=w*(t/d)+(1-w)*uval[i];
1043  }
1044  } // end for i
1045 #ifdef _OPENMP
1046  }
1047 #endif
1048  } // end while
1049  }
1050 
1051  return;
1052 }
1053 
1068  dvector *b,
1069  dvector *x,
1070  void *data)
1071 {
1072  const INT m=A->row, m2=2*m, memneed=3*m;
1073  const ILU_data *iludata=(ILU_data *)data;
1074 
1075  REAL *zz = iludata->work;
1076  REAL *zr = iludata->work+m;
1077  REAL *z = iludata->work+m2;
1078 
1079  if (iludata->nwork<memneed) goto MEMERR;
1080 
1081  {
1082  INT i, j, jj, begin_row, end_row;
1083  REAL *lu = iludata->luval;
1084  INT *ijlu = iludata->ijlu;
1085  REAL *xval = x->val, *bval = b->val;
1086 
1088  fasp_array_cp(m,bval,zr); fasp_blas_dcsr_aAxpy(-1.0,A,xval,zr);
1089 
1090  // forward sweep: solve unit lower matrix equation L*zz=zr
1091  zz[0]=zr[0];
1092  for (i=1;i<m;++i) {
1093  begin_row=ijlu[i]; end_row=ijlu[i+1];
1094  for (j=begin_row;j<end_row;++j) {
1095  jj=ijlu[j];
1096  if (jj<i) zr[i]-=lu[j]*zz[jj];
1097  else break;
1098  }
1099  zz[i]=zr[i];
1100  }
1101 
1102  // backward sweep: solve upper matrix equation U*z=zz
1103  z[m-1]=zz[m-1]*lu[m-1];
1104  for (i=m-2;i>=0;--i) {
1105  begin_row=ijlu[i]; end_row=ijlu[i+1]-1;
1106  for (j=end_row;j>=begin_row;--j) {
1107  jj=ijlu[j];
1108  if (jj>i) zz[i]-=lu[j]*z[jj];
1109  else break;
1110  }
1111  z[i]=zz[i]*lu[i];
1112  }
1113 
1114  fasp_blas_array_axpy(m,1,z,xval);
1115  }
1116 
1117  return;
1118 
1119 MEMERR:
1120  printf("### ERROR: ILU needs %d memory, only %d available! %s : %d\n",
1121  memneed, iludata->nwork, __FILE__, __LINE__);
1122  fasp_chkerr(ERROR_ALLOC_MEM, __FUNCTION__);
1123 }
1124 
1146  const INT i_1,
1147  const INT i_n,
1148  const INT s,
1149  dCSRmat *A,
1150  dvector *b,
1151  INT L,
1152  const REAL w)
1153 {
1154  const INT *ia=A->IA,*ja=A->JA;
1155  const REAL *aj=A->val,*bval=b->val;
1156  REAL *uval=u->val;
1157 
1158  // local variables
1159  INT i,j,k,begin_row,end_row;
1160  REAL temp1,temp2,alpha;
1161 
1162 #ifdef _OPENMP
1163  // variables for OpenMP
1164  const INT N = ABS(i_n - i_1)+1;
1165  INT myid, mybegin, myend;
1166  INT nthreads = FASP_GET_NUM_THREADS();
1167 #endif
1168 
1169  if (s > 0) {
1170 
1171  while (L--) {
1172 #ifdef _OPENMP
1173  if (N > OPENMP_HOLDS) {
1174 #pragma omp parallel for private(myid, mybegin, myend, i, temp1, temp2, begin_row, end_row, k, alpha, j)
1175  for (myid=0; myid<nthreads; myid++) {
1176  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
1177  mybegin += i_1, myend += i_1;
1178  for (i=mybegin; i<myend; i+=s) {
1179  temp1 = 0; temp2 = 0;
1180  begin_row=ia[i], end_row=ia[i+1];
1181  for (k=begin_row; k<end_row; k++) {
1182  j=ja[k];
1183  temp1 += aj[k]*aj[k];
1184  temp2 += aj[k]*uval[j];
1185  } // end for k
1186  }
1187  alpha = (bval[i] - temp2)/temp1;
1188  for (k=begin_row; k<end_row; ++k){
1189  j = ja[k];
1190  uval[j] += w*alpha*aj[k];
1191  }// end for k
1192  } // end for i
1193  }
1194  else {
1195 #endif
1196  for (i=i_1;i<=i_n;i+=s) {
1197  temp1 = 0; temp2 = 0;
1198  begin_row=ia[i], end_row=ia[i+1];
1199  for (k=begin_row;k<end_row;++k) {
1200  j=ja[k];
1201  temp1 += aj[k]*aj[k];
1202  temp2 += aj[k]*uval[j];
1203  } // end for k
1204  alpha = (bval[i] - temp2)/temp1;
1205  for (k=begin_row;k<end_row;++k){
1206  j = ja[k];
1207  uval[j] += w*alpha*aj[k];
1208  }// end for k
1209  } // end for i
1210 #ifdef _OPENMP
1211  }
1212 #endif
1213  } // end while
1214 
1215  } // if s
1216 
1217  else {
1218  while (L--) {
1219 #ifdef _OPENMP
1220  if (N > OPENMP_HOLDS) {
1221 #pragma omp parallel for private(myid, mybegin, myend, i, temp1, temp2, begin_row, end_row, k, alpha, j)
1222  for (myid=0; myid<nthreads; myid++) {
1223  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
1224  mybegin = i_1 - mybegin, myend = i_1 - myend;
1225  for (i=mybegin; i>myend; i+=s) {
1226  temp1 = 0; temp2 = 0;
1227  begin_row=ia[i], end_row=ia[i+1];
1228  for (k=begin_row;k<end_row;++k) {
1229  j=ja[k];
1230  temp1 += aj[k]*aj[k];
1231  temp2 += aj[k]*uval[j];
1232  } // end for k
1233  alpha = (bval[i] - temp2)/temp1;
1234  for (k=begin_row;k<end_row;++k){
1235  j = ja[k];
1236  uval[j] += w*alpha*aj[k];
1237  }// end for k
1238  } // end for i
1239  }
1240  }
1241  else {
1242 #endif
1243  for (i=i_1;i>=i_n;i+=s) {
1244  temp1 = 0; temp2 = 0;
1245  begin_row=ia[i], end_row=ia[i+1];
1246  for (k=begin_row;k<end_row;++k) {
1247  j=ja[k];
1248  temp1 += aj[k]*aj[k];
1249  temp2 += aj[k]*uval[j];
1250  } // end for k
1251  alpha = (bval[i] - temp2)/temp1;
1252  for (k=begin_row;k<end_row;++k){
1253  j = ja[k];
1254  uval[j] += w*alpha*aj[k];
1255  }// end for k
1256  } // end for i
1257 #ifdef _OPENMP
1258  }
1259 #endif
1260  } // end while
1261 
1262  } // end if
1263 
1264  return;
1265 }
1266 
1287  const INT i_1,
1288  const INT i_n,
1289  const INT s,
1290  dCSRmat *A,
1291  dvector *b,
1292  INT L)
1293 {
1294  const INT N = ABS(i_n - i_1)+1;
1295  const INT *ia=A->IA, *ja=A->JA;
1296  const REAL *aj=A->val,*bval=b->val;
1297  REAL *uval=u->val;
1298 
1299  // local variables
1300  INT i,j,k,begin_row,end_row;
1301 
1302 #ifdef _OPENMP
1303  // variables for OpenMP
1304  INT myid, mybegin, myend;
1305  INT nthreads = FASP_GET_NUM_THREADS();
1306 #endif
1307 
1308  // Checks should be outside of for; t,d can be allocated before calling!!! --Chensong
1309  REAL *t = (REAL *)fasp_mem_calloc(N,sizeof(REAL));
1310  REAL *d = (REAL *)fasp_mem_calloc(N,sizeof(REAL));
1311 
1312  while (L--) {
1313  if (s>0) {
1314 #ifdef _OPENMP
1315  if (N > OPENMP_HOLDS) {
1316 #pragma omp parallel for private(myid, mybegin, myend, i, begin_row, end_row, k, j)
1317  for (myid=0; myid<nthreads; myid++) {
1318  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
1319  mybegin += i_1, myend += i_1;
1320  for (i=mybegin; i<myend; i+=s) {
1321  t[i]=bval[i]; d[i]=0.0;
1322  begin_row=ia[i], end_row=ia[i+1];
1323  for (k=begin_row; k<end_row; k++) {
1324  j=ja[k];
1325  t[i]-=aj[k]*uval[j];
1326  d[i]+=ABS(aj[k]);
1327  }
1328  }
1329  }
1330 #pragma omp parallel for private(i)
1331  for (i=i_1;i<=i_n;i+=s) {
1332  if (ABS(d[i])>SMALLREAL) u->val[i]+=t[i]/d[i];
1333  }
1334  }
1335  else {
1336 #endif
1337  for (i=i_1;i<=i_n;i+=s) {
1338  t[i]=bval[i]; d[i]=0.0;
1339  begin_row=ia[i],end_row=ia[i+1];
1340  for (k=begin_row;k<end_row;++k) {
1341  j=ja[k];
1342  t[i]-=aj[k]*uval[j];
1343  d[i]+=ABS(aj[k]);
1344  }
1345  }
1346 
1347  for (i=i_1;i<=i_n;i+=s) {
1348  if (ABS(d[i])>SMALLREAL) u->val[i]+=t[i]/d[i];
1349  }
1350 #ifdef _OPENMP
1351  }
1352 #endif
1353  }
1354  else {
1355 #ifdef _OPENMP
1356  if (N > OPENMP_HOLDS) {
1357 #pragma omp parallel for private(myid, mybegin, myend, i, k, j, begin_row, end_row)
1358  for (myid=0; myid<nthreads; myid++) {
1359  FASP_GET_START_END(myid, nthreads, N, &mybegin, &myend);
1360  mybegin = i_1 - mybegin, myend = i_1 - myend;
1361  for (i=mybegin; i>myend; i+=s) {
1362  t[i]=bval[i]; d[i]=0.0;
1363  begin_row=ia[i], end_row=ia[i+1];
1364  for (k=begin_row; k<end_row; k++) {
1365  j=ja[k];
1366  t[i]-=aj[k]*uval[j];
1367  d[i]+=ABS(aj[k]);
1368  }
1369  }
1370  }
1371 #pragma omp parallel for private(i)
1372  for (i=i_1;i>=i_n;i+=s) {
1373  if (ABS(d[i])>SMALLREAL) u->val[i]+=t[i]/d[i];
1374  }
1375  }
1376  else {
1377 #endif
1378  for (i=i_1;i>=i_n;i+=s) {
1379  t[i]=bval[i];d[i]=0.0;
1380  begin_row=ia[i],end_row=ia[i+1];
1381  for (k=begin_row;k<end_row;++k) {
1382  j=ja[k];
1383  t[i]-=aj[k]*uval[j];
1384  d[i]+=ABS(aj[k]);
1385  }
1386  }
1387 
1388  for (i=i_1;i>=i_n;i+=s) {
1389  if (ABS(d[i])>SMALLREAL) u->val[i]+=t[i]/d[i];
1390  }
1391 #ifdef _OPENMP
1392  }
1393 #endif
1394  }
1395 
1396  } // end while
1397 
1398  fasp_mem_free(t);
1399  fasp_mem_free(d);
1400 
1401  return;
1402 }
1403 
1426  dCSRmat *A,
1427  dvector *b,
1428  INT L,
1429  const INT order,
1430  INT *mark,
1431  const INT maximap,
1432  const INT nx,
1433  const INT ny,
1434  const INT nz)
1435 {
1436  const INT nrow = b->row; // number of rows
1437  INT *ia=A->IA,*ja=A->JA;
1438  REAL *aa=A->val, *bval=b->val;
1439  REAL *uval=u->val;
1440 
1441  INT i,j,k,begin_row,end_row;
1442  REAL t,d=0.0;
1443 
1444  // forward
1445  if (order == 1) {
1446  while (L--) {
1447  rb0f3d( ia, ja, aa, uval, bval, mark, nx, ny, nz, 1);
1448 #if 1
1449  // for (ii =0;ii <10;ii++)
1450  for (i = maximap; i < nrow; i ++) {
1451  t = bval[i];
1452  begin_row = ia[i], end_row = ia[i+1];
1453  for (k = begin_row; k < end_row; k ++) {
1454  j = ja[k];
1455  if (i!=j) t -= aa[k]*uval[j];
1456  else d = aa[k];
1457  } // end for k
1458  if (ABS(d) > SMALLREAL) uval[i] = t/d;
1459  } // end for i
1460 #endif
1461  } // end while
1462  }
1463 
1464  // backward
1465  else {
1466  while (L--) {
1467 #if 1
1468  // for (ii =0;ii <10;ii++)
1469  for (i = nrow-1; i >= maximap; i --) {
1470  t = bval[i];
1471  begin_row = ia[i],end_row = ia[i+1];
1472  for (k = begin_row; k < end_row; k ++) {
1473  j = ja[k];
1474  if (i!=j) t -= aa[k]*uval[j];
1475  else d = aa[k];
1476  } // end for k
1477  if (ABS(d) > SMALLREAL) uval[i] = t/d;
1478  } // end for i
1479 #endif
1480  rb0b3d( ia, ja, aa, uval, bval, mark, nx, ny, nz, 1);
1481  } // end while
1482  }
1483  return;
1484 }
1485 
1486 /*---------------------------------*/
1487 /*-- Private Functions --*/
1488 /*---------------------------------*/
1489 
1513 static void swep2db (INT *ia,
1514  INT *ja,
1515  REAL *aa,
1516  REAL *u,
1517  REAL *f,
1518  INT nbegx,
1519  INT nbegy,
1520  INT *mark,
1521  INT nx,
1522  INT ny)
1523 {
1524  INT j, j0, i, i0;
1525  INT begin_row, end_row, ii, jj;
1526  REAL t, d;
1527 
1528  nbegx = nx + nbegx;
1529  nbegy = ny + nbegy;
1530 
1531 #ifdef _OPENMP
1532 #pragma omp parallel for private(j,j0,i,i0,t,begin_row,end_row,ii,jj,d)
1533 #endif
1534  for (j = nbegy; j >=0; j-=2) {
1535  j0= j*nx;
1536  for (i = nbegx; i >=0; i-=2) {
1537  i0 = i + j0;
1538  i0 = mark[i0]-1; // Fortran to C
1539  if (i0>=0 ) {
1540  t = f[i0];
1541  begin_row = ia[i0], end_row = ia[i0+1];
1542  for (ii = begin_row; ii < end_row; ii ++) {
1543  jj = ja[ii];
1544  if (i0!=jj) t -= aa[ii]*u[jj];
1545  else d = aa[ii];
1546  } // end for ii
1547  if (ABS(d) > SMALLREAL) u[i0] = t/d;
1548  } //if (i0>=0 )
1549  }
1550  }
1551 }
1552 
1553 
1579 static void swep3db (INT *ia,
1580  INT *ja,
1581  REAL *aa,
1582  REAL *u,
1583  REAL *f,
1584  INT nbegx,
1585  INT nbegy,
1586  INT nbegz,
1587  INT *mark,
1588  INT nx,
1589  INT ny,
1590  INT nz)
1591 {
1592  INT nxy, k, k0, j, j0, i, i0;
1593  INT begin_row, end_row, ii, jj;
1594  REAL t, d;
1595 
1596  nxy=nx*ny;
1597  nbegx = nx + nbegx;
1598  nbegy = ny + nbegy;
1599  nbegz = nz + nbegz;
1600 
1601 #ifdef _OPENMP
1602 #pragma omp parallel for private(k,k0,j,j0,i,i0,t,begin_row,end_row,ii,jj,d)
1603 #endif
1604  for (k=nbegz; k >=0; k-=2) {
1605  k0= k*nxy;
1606  for (j = nbegy; j >=0; j-=2) {
1607  j0= j*nx;
1608  for (i = nbegx; i >=0; i-=2) {
1609  i0 = i + j0 + k0;
1610  i0 = mark[i0]-1; // Fortran to C
1611  if (i0>=0 ) {
1612  t = f[i0];
1613  begin_row = ia[i0], end_row = ia[i0+1];
1614  for (ii = begin_row; ii < end_row; ii ++) {
1615  jj = ja[ii];
1616  if (i0!=jj) t -= aa[ii]*u[jj];
1617  else d = aa[ii];
1618  } // end for ii
1619 
1620  if (ABS(d) > SMALLREAL) u[i0] = t/d;
1621  } //if (i0>=0 )
1622  }
1623  }
1624  }
1625 }
1626 
1627 /*
1628  * \fn static void rb0b2d (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f,
1629  * INT *mark, INT nx, INT ny, INT nsweeps)
1630  *
1631  * \brief Colored Gauss-Seidel backward smoother for Au=b
1632  *
1633  * \param ia Pointer to start location of each row
1634  * \param ja Pointer to column index of nonzero elements
1635  * \param aa Pointer to nonzero elements of
1636  * \param u Pointer to initial guess
1637  * \param f Pointer to right hand
1638  * \param mark Pointer to order of nodes
1639  * \param nx Number of nodes in x direction
1640  * \param ny Number of nodes in y direction
1641  * \param nsweeps Number of relaxation sweeps
1642  *
1643  * \author Chunsheng Feng, Zheng Li
1644  * \date 02/06/2012
1645  *
1646  * \note This subroutine is based on SiPSMG (Simple Poisson Solver based on MultiGrid)
1647  * (c) 2008 Johannes Kraus, Jinchao Xu, Yunrong Zhu, Ludmil Zikatanov
1648  */
1649 static void rb0b2d (INT *ia,
1650  INT *ja,
1651  REAL *aa,
1652  REAL *u,
1653  REAL *f,
1654  INT *mark,
1655  INT nx,
1656  INT ny,
1657  INT nsweeps)
1658 {
1659 #if 1
1660  INT n0e,n0o,isweep;
1661 
1662  n0e=0;
1663  n0o=1;
1664 
1665  for (isweep = 1; isweep <= nsweeps; isweep++) {
1666  /*... e-e */
1667  swep2df(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
1668  /*... e-o */
1669  swep2df(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
1670  /*... o-e */
1671  swep2df(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
1672  /*... o-o */
1673  swep2df(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
1674  }
1675 #else
1676  INT n0e = -1, n0o = -2, isweep;
1677  //INT ex, ey, ez;
1678  //INT ox, oy, oz;
1679 
1680  for (isweep = 1; isweep <= nsweeps; isweep++) {
1681  if ((nx%2==0) &&(ny%2 ==0)) {
1682  /*... e-e (and going backwards) */
1683  swep2db(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
1684  /*... e-o */
1685  swep2db(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
1686  /*... o-e */
1687  swep2db(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
1688  /*... o-o */
1689  swep2db(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
1690  }
1691  else if ((nx%2==0)&&(ny%2 ==1)) {
1692  /*... e-o (and going backwards) */
1693  swep2db(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
1694  /*... e-e */
1695  swep2db(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
1696  /*... o-o */
1697  swep2db(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
1698  /*... o-e */
1699  swep2db(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
1700  }
1701  else if ((nx%2==1)&&(ny%2 ==0)) {
1702  /*... o-e (and going backwards) */
1703  swep2db(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
1704  /*... o-o */
1705  swep2db(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
1706  /*... e-e */
1707  swep2db(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
1708  /*... e-o */
1709  swep2db(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
1710  }
1711  else if ((nx%2==1)&&(ny%2 ==1)) {
1712  /*... o-o (and going backwards) */
1713  swep2db(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
1714  /*... o-e */
1715  swep2db(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
1716  /*... e-o */
1717  swep2db(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
1718  /*... e-e */
1719  swep2db(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
1720  }
1721  }
1722 #endif
1723 }
1724 
1725 /*
1726  * \fn static void rb0b3d (INT *ia, INT *ja, REAL *aa,REAL *u, REAL *f,
1727  * INT *mark, INT nx, INT ny, INT nz, INT nsweeps)
1728  *
1729  * \brief Colores Gauss-Seidel backward smoother for Au=b
1730  *
1731  * \param ia Pointer to start location of each row
1732  * \param ja Pointer to column index of nonzero elements
1733  * \param aa Pointer to nonzero elements of
1734  * \param u Pointer to initial guess
1735  * \param f Pointer to right hand
1736  * \param mark Pointer to order of nodes
1737  * \param nx Number of nodes in x direction
1738  * \param ny Number of nodes in y direction
1739  * \param nz Number of nodes in z direction
1740  * \param nsweeps Number of relaxation sweeps
1741  *
1742  * \author Chunsheng Feng
1743  * \date 02/06/2012
1744  *
1745  * \note This subroutine is based on SiPSMG (Simple Poisson Solver based on MultiGrid)
1746  * (c) 2008 Johannes Kraus, Jinchao Xu, Yunrong Zhu, Ludmil Zikatanov
1747  */
1748 static void rb0b3d (INT *ia,
1749  INT *ja,
1750  REAL *aa,
1751  REAL *u,
1752  REAL *f,
1753  INT *mark,
1754  INT nx,
1755  INT ny,
1756  INT nz,
1757  INT nsweeps)
1758 {
1759 #if 1
1760  INT n0e,n0o,isweep;
1761 
1762  n0e=0;
1763  n0o=1;
1764 
1765  for (isweep = 1; isweep <= nsweeps; isweep++) {
1766  /*... e-e-e */
1767  swep3df(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1768  /*... e-e-o */
1769  swep3df(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1770  /*... e-o-e */
1771  swep3df(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1772  /*... e-o-o */
1773  swep3df(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1774  /*... o-e-e */
1775  swep3df(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1776  /*... o-e-o */
1777  swep3df(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1778  /*... o-o-e */
1779  swep3df(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1780  /*... o-o-o */
1781  swep3df(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1782  }
1783 #else
1784  INT n0e = -1, n0o = -2, isweep;
1785  //INT ex, ey, ez;
1786  //INT ox, oy, oz;
1787 
1788  for (isweep = 1; isweep <= nsweeps; isweep++) {
1789  if ((nx%2==0) &&(ny%2 ==0) &&(nz%2==0)) {
1790  /*... e-e-e (and going backwards) */
1791  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1792  /*... e-e-o */
1793  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1794  /*... e-o-e */
1795  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1796  /*... e-o-o */
1797  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1798  /*... o-e-e */
1799  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1800  /*... o-e-o */
1801  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1802  /*... o-o-e */
1803  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1804  /*... o-o-o */
1805  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1806  /*... e-e-e (and going backwards) */
1807  //swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1808  }
1809  else if ((nx%2==0) &&(ny%2 ==0) &&(nz%2==1)) {
1810  /*... e-e-o (and going backwards) */
1811  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1812  /*... e-e-e */
1813  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1814  /*... e-o-o */
1815  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1816  /*... e-o-e */
1817  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1818  /*... o-e-o */
1819  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1820  /*... o-e-e */
1821  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1822  /*... o-o-o */
1823  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1824  /*... o-o-e */
1825  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1826  /*... e-e-o (and going backwards) */
1827  //swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1828  }
1829  else if ((nx%2==0)&&(ny%2 ==1)&&(nz%2==0)) {
1830  /*... e-o-e (and going backwards) */
1831  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1832  /*... e-o-o */
1833  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1834  /*... e-e-e */
1835  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1836  /*... e-e-o */
1837  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1838  /*... o-o-e */
1839  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1840  /*... o-o-o */
1841  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1842  /*... o-e-e */
1843  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1844  /*... o-e-o */
1845  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1846  /*... e-o-e (and going backwards) */
1847  // swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1848  }
1849  else if ((nx%2==0)&&(ny%2 ==1)&&(nz%2==1)) {
1850  /*... e-o-o (and going backwards) */
1851  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1852  /*... e-o-e */
1853  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1854  /*... e-e-o */
1855  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1856  /*... e-e-e */
1857  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1858  /*... o-o-o */
1859  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1860  /*... o-o-e */
1861  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1862  /*... o-e-o */
1863  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1864  /*... o-e-e */
1865  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1866  /*... e-o-o (and going backwards) */
1867  // swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1868  }
1869  else if ((nx%2==1)&&(ny%2 ==0)&&(nz%2==0)) {
1870  /*... o-e-e (and going backwards) */
1871  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1872  /*... o-e-o */
1873  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1874  /*... o-o-e */
1875  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1876  /*... o-o-o */
1877  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1878  /*... e-e-e */
1879  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1880  /*... e-e-o */
1881  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1882  /*... e-o-e */
1883  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1884  /*... e-o-o */
1885  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1886  /*... o-e-e (and going backwards) */
1887  // swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1888  }
1889  else if ((nx%2==1)&&(ny%2 ==0)&&(nz%2==1)) {
1890  /*... o-e-o (and going backwards) */
1891  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1892  /*... o-e-e */
1893  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1894  /*... o-o-o */
1895  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1896  /*... o-o-e */
1897  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1898  /*... e-e-o */
1899  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1900  /*... e-e-e */
1901  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1902  /*... e-o-o */
1903  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1904  /*... e-o-e */
1905  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1906  /*... o-e-o (and going backwards) */
1907  // swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1908  }
1909  else if ((nx%2==1)&&(ny%2 ==1)&&(nz%2==0)) {
1910  /*... o-o-e (and going backwards) */
1911  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1912  /*... o-o-o */
1913  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1914  /*... o-e-e */
1915  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1916  /*... o-e-o */
1917  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1918  /*... e-o-e */
1919  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1920  /*... e-o-o */
1921  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1922  /*... e-e-e */
1923  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1924  /*... e-e-o */
1925  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1926  /*... o-o-e (and going backwards) */
1927  //swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1928  }
1929  else if ((nx%2==1)&&(ny%2 ==1)&&(nz%2==1)) {
1930  /*... o-o-o (and going backwards) */
1931  swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1932  /*... o-o-e */
1933  swep3db(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
1934  /*... o-e-o */
1935  swep3db(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
1936  /*... o-e-e */
1937  swep3db(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
1938  /*... e-o-o */
1939  swep3db(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
1940  /*... e-o-e */
1941  swep3db(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
1942  /*... e-e-o */
1943  swep3db(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
1944  /*... e-e-e */
1945  swep3db(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
1946  /*... o-o-o (and going backwards) */
1947  //swep3db(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
1948 
1949  }
1950  }
1951 #endif
1952 }
1953 
1977 static void swep2df (INT *ia,
1978  INT *ja,
1979  REAL *aa,
1980  REAL *u,
1981  REAL *f,
1982  INT nbegx,
1983  INT nbegy,
1984  INT *mark,
1985  INT nx,
1986  INT ny )
1987 {
1988  INT j,j0,i,i0;
1989  INT begin_row,end_row,ii,jj;
1990  REAL t,d=0;
1991 
1992 #ifdef _OPENMP
1993 #pragma omp parallel for private(j,j0,i,i0,t,begin_row,end_row,ii,jj,d)
1994 #endif
1995  for (j = nbegy; j < ny; j+=2) {
1996  j0= j*nx;
1997  for (i = nbegx; i < nx; i+=2)
1998  {
1999  i0 = i + j0;
2000  i0 = mark[i0]-1; //Fortran to C
2001  if (i0>=0 ) {
2002  t = f[i0];
2003  begin_row = ia[i0], end_row = ia[i0+1];
2004  for (ii = begin_row; ii < end_row; ii ++) {
2005  jj = ja[ii];
2006  if (i0!=jj) t -= aa[ii]*u[jj];
2007  else d = aa[ii];
2008  } // end for ii
2009  if (ABS(d) > SMALLREAL) u[i0] = t/d;
2010  } // if (i0>=0 )
2011  }
2012  }
2013 
2014 }
2015 
2016 
2042 static void swep3df (INT *ia,
2043  INT *ja,
2044  REAL *aa,
2045  REAL *u,
2046  REAL *f,
2047  INT nbegx,
2048  INT nbegy,
2049  INT nbegz,
2050  INT *mark,
2051  INT nx,
2052  INT ny,
2053  INT nz)
2054 {
2055  INT nxy=nx*ny,k,k0,j,j0,i,i0;
2056  INT begin_row,end_row,ii,jj;
2057  REAL t,d=0;
2058 
2059 #ifdef _OPENMP
2060 #pragma omp parallel for private(k,k0,j,j0,i,i0,t,begin_row,end_row,ii,jj,d)
2061 #endif
2062  for (k=nbegz; k < nz; k+=2) {
2063  k0= k*nxy;
2064  for (j = nbegy; j < ny; j+=2) {
2065  j0= j*nx;
2066  for (i = nbegx; i < nx; i+=2)
2067  {
2068  i0 = i + j0 + k0;
2069  i0 = mark[i0]-1; //Fortran to C
2070 
2071  if (i0>=0 ) {
2072  t = f[i0];
2073  begin_row = ia[i0], end_row = ia[i0+1];
2074  for (ii = begin_row; ii < end_row; ii ++) {
2075  jj = ja[ii];
2076  if (i0!=jj) t -= aa[ii]*u[jj];
2077  else d = aa[ii];
2078  } // end for ii
2079 
2080  if (ABS(d) > SMALLREAL) u[i0] = t/d;
2081  } // if (i0>=0 )
2082  }
2083  }
2084  }
2085 
2086 }
2087 
2088 /*
2089  * \fn static void rb0f2d (INT *ia, INT *ja, REAL *aa, REAL *u, REAL *f,
2090  * INT *mark, INT nx, INT ny, INT nsweeps)
2091  *
2092  * \brief Colores Gauss-Seidel forward smoother for Au = b
2093  *
2094  * \param ia Pointer to the start location to of row
2095  * \param ja Pointer to the column index of nonzero elements
2096  * \param aa Pointer to the values of the nonzero elements
2097  * \param u Pointer to initial value
2098  * \param f Pointer to right hand
2099  * \param mark Pointer to the order index of nodes
2100  * \param nx Number of nodes in x direction
2101  * \param ny Number of nodes in y direction
2102  * \param nsweeps Number of relaxation sweeps
2103  *
2104  * \author Chunsheng Feng, Zheng Li
2105  * \data 02/06/2012
2106  *
2107  * NOTE: The following code is based on SiPSMG (Simple Poisson Solver based on MultiGrid)
2108  * (c) 2008 Johannes Kraus, Jinchao Xu, Yunrong Zhu, Ludmil Zikatanov
2109  */
2110 static void rb0f2d (INT *ia,
2111  INT *ja,
2112  REAL *aa,
2113  REAL *u,
2114  REAL *f,
2115  INT *mark,
2116  INT nx,
2117  INT ny,
2118  INT nsweeps)
2119 {
2120  INT n0e,n0o,isweep;
2121 
2122  n0e=0;
2123  n0o=1;
2124 
2125  for (isweep = 1; isweep <= nsweeps; isweep++) {
2126  /*... o-o */
2127  swep2df(ia,ja,aa,u,f,n0o,n0o,mark,nx,ny);
2128  /*... o-e */
2129  swep2df(ia,ja,aa,u,f,n0o,n0e,mark,nx,ny);
2130  /*... e-o */
2131  swep2df(ia,ja,aa,u,f,n0e,n0o,mark,nx,ny);
2132  /*... e-e */
2133  swep2df(ia,ja,aa,u,f,n0e,n0e,mark,nx,ny);
2134  }
2135 }
2136 
2137 /*
2138  * \fn static void rb0f3d (INT *ia, INT *ja, REAL *aa,REAL *u, REAL *f, INT *mark,
2139  * INT nx, INT ny, INT nz, INT nsweeps)
2140  *
2141  * \brief Colores Gauss-Seidel forward smoother for Au=b
2142  *
2143  * \param ia Pointer to the start location to of row
2144  * \param ja Pointer to the column index of nonzero elements
2145  * \param aa Pointer to the values of the nonzero elements
2146  * \param u Pointer to initial value
2147  * \param f Pointer to right hand
2148  * \param mark Pointer to the order index of nodes
2149  * \param nx Number of nodes in x direction
2150  * \param ny Number of nodes in y direction
2151  * \param nz Number of nodes in z direction
2152  * \param nsweeps Number of relaxation sweeps
2153  *
2154  * \author Chunsheng Feng, Zheng Li
2155  * \data 02/06/2012
2156  *
2157  * NOTE: The following code is based on SiPSMG (Simple Poisson Solver based on MultiGrid)
2158  * (c) 2008 Johannes Kraus, Jinchao Xu, Yunrong Zhu, Ludmil Zikatanov
2159  */
2160 static void rb0f3d (INT *ia,
2161  INT *ja,
2162  REAL *aa,
2163  REAL *u,
2164  REAL *f,
2165  INT *mark,
2166  INT nx,
2167  INT ny,
2168  INT nz,
2169  INT nsweeps )
2170 {
2171  INT n0e,n0o,isweep;
2172 
2173  n0e=0;
2174  n0o=1;
2175 
2176  for (isweep = 1; isweep <= nsweeps; isweep++) {
2177  /*... o-o-o */
2178  swep3df(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
2179  /*... o-o-e */
2180  swep3df(ia,ja,aa,u,f,n0o,n0o,n0e,mark,nx,ny,nz);
2181  /*... o-e-o */
2182  swep3df(ia,ja,aa,u,f,n0o,n0e,n0o,mark,nx,ny,nz);
2183  /*... o-e-e */
2184  swep3df(ia,ja,aa,u,f,n0o,n0e,n0e,mark,nx,ny,nz);
2185  /*... e-o-o */
2186  swep3df(ia,ja,aa,u,f,n0e,n0o,n0o,mark,nx,ny,nz);
2187  /*... e-o-e */
2188  swep3df(ia,ja,aa,u,f,n0e,n0o,n0e,mark,nx,ny,nz);
2189  /*... e-e-o */
2190  swep3df(ia,ja,aa,u,f,n0e,n0e,n0o,mark,nx,ny,nz);
2191  /*... e-e-e */
2192  swep3df(ia,ja,aa,u,f,n0e,n0e,n0e,mark,nx,ny,nz);
2193  /*... o-o-o */
2194  swep3df(ia,ja,aa,u,f,n0o,n0o,n0o,mark,nx,ny,nz);
2195 
2196  }
2197 }
2198 
2199 #if 0
2200 
2220 static dCSRmat form_contractor (dCSRmat *A,
2221  const INT smoother,
2222  const INT steps,
2223  const INT ndeg,
2224  const REAL relax,
2225  const REAL dtol)
2226 {
2227  const INT n=A->row;
2228  INT i;
2229 
2230  REAL *work = (REAL *)fasp_mem_calloc(2*n,sizeof(REAL));
2231 
2232  dvector b, x;
2233  b.row=x.row=n;
2234  b.val=work; x.val=work+n;
2235 
2236  INT *index = (INT *)fasp_mem_calloc(n,sizeof(INT));
2237 
2238  for (i=0; i<n; ++i) index[i]=i;
2239 
2240  dCSRmat B = fasp_dcsr_create(n, n, n*n); // too much memory required, need to change!!
2241 
2242  dCSRmat C, D;
2243 
2244  for (i=0; i<n; ++i){
2245 
2246  // get i-th column
2247  fasp_dcsr_getcol(i, A, b.val);
2248 
2249  // set x =0.0
2250  fasp_dvec_set(n, &x, 0.0);
2251 
2252  // smooth
2253  switch (smoother) {
2254  case GS:
2255  fasp_smoother_dcsr_gs(&x, 0, n-1, 1, A, &b, steps);
2256  break;
2257  case POLY:
2258  fasp_smoother_dcsr_poly(A, &b, &x, n, ndeg, steps);
2259  break;
2260  case JACOBI:
2261  fasp_smoother_dcsr_jacobi(&x, 0, n-1, 1, A, &b, steps);
2262  break;
2263  case SGS:
2264  fasp_smoother_dcsr_sgs(&x, A, &b, steps);
2265  break;
2266  case SOR:
2267  fasp_smoother_dcsr_sor(&x, 0, n-1, 1, A, &b, steps, relax);
2268  break;
2269  case SSOR:
2270  fasp_smoother_dcsr_sor(&x, 0, n-1, 1, A, &b, steps, relax);
2271  fasp_smoother_dcsr_sor(&x, n-1, 0,-1, A, &b, steps, relax);
2272  break;
2273  case GSOR:
2274  fasp_smoother_dcsr_gs(&x, 0, n-1, 1, A, &b, steps);
2275  fasp_smoother_dcsr_sor(&x, n-1, 0, -1, A, &b, steps, relax);
2276  break;
2277  case SGSOR:
2278  fasp_smoother_dcsr_gs(&x, 0, n-1, 1, A, &b, steps);
2279  fasp_smoother_dcsr_gs(&x, n-1, 0,-1, A, &b, steps);
2280  fasp_smoother_dcsr_sor(&x, 0, n-1, 1, A, &b, steps, relax);
2281  fasp_smoother_dcsr_sor(&x, n-1, 0,-1, A, &b, steps, relax);
2282  break;
2283  default:
2284  printf("### ERROR: Wrong smoother type!\n");
2285  fasp_chkerr(ERROR_INPUT_PAR, __FUNCTION__);
2286  }
2287 
2288  // store to B
2289  B.IA[i] = i*n;
2290  memcpy(&(B.JA[i*n]), index, n*sizeof(INT));
2291  memcpy(&(B.val[i*n]), x.val, x.row*sizeof(REAL));
2292 
2293  }
2294 
2295  B.IA[n] = n*n;
2296 
2297  // drop small entries
2298  compress_dCSRmat(&B, &D, dtol);
2299 
2300  // get contractor
2301  fasp_dcsr_trans(&D, &C);
2302 
2303  // clean up
2304  fasp_mem_free(work);
2305  fasp_dcsr_free(&B);
2306  fasp_dcsr_free(&D);
2307 
2308  return C;
2309 }
2310 #endif
2311 
2312 /*---------------------------------*/
2313 /*-- End of File --*/
2314 /*---------------------------------*/
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
Definition: fasp.h:412
dCSRmat fasp_dcsr_create(const INT m, const INT n, const INT nnz)
Create CSR sparse matrix data memory space.
Definition: sparse_csr.c:34
void fasp_chkerr(const SHORT status, const char *fctname)
Check error status and print out error messages before quit.
Definition: message.c:199
INT * JA
integer array of column indexes, the size is nnz
Definition: fasp.h:163
#define ERROR_INPUT_PAR
Definition: fasp_const.h:31
#define REAL
Definition: fasp.h:67
void fasp_smoother_dcsr_ilu(dCSRmat *A, dvector *b, dvector *x, void *data)
ILU method as a smoother.
void fasp_smoother_dcsr_kaczmarz(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L, const REAL w)
Kaczmarz method as a smoother.
INT fasp_dcsr_trans(dCSRmat *A, dCSRmat *AT)
Find transpose of dCSRmat matrix A.
Definition: sparse_csr.c:826
REAL * val
nonzero entries of A
Definition: fasp.h:166
void fasp_smoother_dcsr_gs_cf(dvector *u, dCSRmat *A, dvector *b, INT L, INT *mark, const INT order)
Gauss-Seidel smoother with C/F ordering for Au=b.
Definition: smoother_csr.c:364
REAL * val
actual vector entries
Definition: fasp.h:348
void * fasp_mem_calloc(LONGLONG size, INT type)
1M = 1024*1024
Definition: memory.c:60
Vector with n entries of REAL type.
Definition: fasp.h:342
#define INT
Definition: fasp.h:64
Data for ILU setup.
Definition: fasp.h:400
INT nwork
work space size
Definition: fasp.h:421
void FASP_GET_START_END(INT procid, INT nprocs, INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: threads.c:83
void fasp_smoother_dcsr_L1diag(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L)
Diagonal scaling (using L1 norm) as a smoother.
void fasp_mem_free(void *mem)
Free up previous allocated memory body.
Definition: memory.c:150
#define FPFIRST
Definition: fasp_const.h:230
#define OPENMP_HOLDS
Definition: fasp_const.h:248
void fasp_dcsr_getcol(const INT n, dCSRmat *A, REAL *col)
Get the n-th column of a CSR matrix A.
Definition: sparse_csr.c:474
REAL * luval
nonzero entries of LU
Definition: fasp.h:415
Main header file for FASP.
#define ERROR_ALLOC_MEM
Definition: fasp_const.h:37
INT row
row number of matrix A, m
Definition: fasp.h:151
void fasp_smoother_dcsr_jacobi(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L)
Jacobi method as a smoother.
Definition: smoother_csr.c:59
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:148
void fasp_blas_array_axpy(const INT n, const REAL a, REAL *x, REAL *y)
y = a*x + y
Definition: blas_array.c:87
INT row
number of rows
Definition: fasp.h:345
void fasp_smoother_dcsr_sor_cf(dvector *u, dCSRmat *A, dvector *b, INT L, const REAL w, INT *mark, const INT order)
SOR smoother with C/F ordering for Au=b.
Definition: smoother_csr.c:873
#define ABS(a)
Definition: fasp.h:74
void fasp_smoother_dcsr_gs_rb3d(dvector *u, dCSRmat *A, dvector *b, INT L, const INT order, INT *mark, const INT maximap, const INT nx, const INT ny, const INT nz)
Colored Gauss-Seidel smoother for Au=b.
void fasp_smoother_dcsr_poly(dCSRmat *Amat, dvector *brhs, dvector *usol, INT n, INT ndeg, INT L)
poly approx to A^{-1} as MG smoother
void fasp_smoother_dcsr_sgs(dvector *u, dCSRmat *A, dvector *b, INT L)
Symmetric Gauss-Seidel method as a smoother.
Definition: smoother_csr.c:629
void fasp_blas_dcsr_aAxpy(const REAL alpha, dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: blas_csr.c:479
void fasp_dvec_set(INT n, dvector *x, REAL val)
Initialize dvector x[i]=val for i=0:n-1.
Definition: vec.c:235
void fasp_array_cp(const INT n, REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: array.c:165
REAL * work
work space
Definition: fasp.h:424
void fasp_smoother_dcsr_gs(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L)
Gauss-Seidel method as a smoother.
Definition: smoother_csr.c:195
void fasp_smoother_dcsr_sor(dvector *u, const INT i_1, const INT i_n, const INT s, dCSRmat *A, dvector *b, INT L, const REAL w)
SOR method as a smoother.
Definition: smoother_csr.c:745
INT * IA
integer array of row pointers, the size is m+1
Definition: fasp.h:160
void fasp_dcsr_free(dCSRmat *A)
Free CSR sparse matrix data memory space.
Definition: sparse_csr.c:166
#define SMALLREAL
Definition: fasp_const.h:238