Fast Auxiliary Space Preconditioning  1.8.4 Feb/15/2016
pvfgmres.c
Go to the documentation of this file.
1 
16 #include <math.h>
17 
18 #include "fasp.h"
19 #include "fasp_functs.h"
20 #include "itsolver_util.inl"
21 
22 /*---------------------------------*/
23 /*-- Public Functions --*/
24 /*---------------------------------*/
25 
55  dvector *b,
56  dvector *x,
57  precond *pc,
58  const REAL tol,
59  const INT MaxIt,
60  const SHORT restart,
61  const SHORT stop_type,
62  const SHORT prtlvl)
63 {
64  const INT n = b->row;
65  const INT min_iter = 0;
66 
67  //--------------------------------------------//
68  // Newly added parameters to monitor when //
69  // to change the restart parameter //
70  //--------------------------------------------//
71  const REAL cr_max = 0.99; // = cos(8^o) (experimental)
72  const REAL cr_min = 0.174; // = cos(80^o) (experimental)
73 
74  // local variables
75  INT iter = 0;
76  INT i,j,k;
77 
78  REAL epsmac = SMALLREAL;
79  REAL r_norm, b_norm, den_norm;
80  REAL epsilon, gamma, t;
81  REAL relres, normu, r_normb;
82 
83  REAL *c = NULL, *s = NULL, *rs = NULL, *norms = NULL, *r = NULL;
84  REAL **p = NULL, **hh = NULL, **z=NULL;
85 
86  REAL cr = 1.0; // convergence rate
87  REAL r_norm_old = 0.0; // save the residual norm of the previous restart cycle
88  INT d = 3; // reduction for the restart parameter
89  INT restart_max = restart; // upper bound for restart in each restart cycle
90  INT restart_min = 3; // lower bound for restart in each restart cycle
91 
92  unsigned INT Restart = restart; // the real restart in some fixed restarted cycle
93  unsigned INT Restart1 = Restart + 1;
94  unsigned LONG worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
95 
96 #if DEBUG_MODE > 0
97  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
98  printf("### DEBUG: maxit = %d, tol = %.4le\n", MaxIt, tol);
99 #endif
100 
101  /* allocate memory and setup temp work space */
102  REAL *work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
103 
104  /* check whether memory is enough for GMRES */
105  while ( (work == NULL) && (Restart > 5) ) {
106  Restart = Restart - 5;
107  worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
108  work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
109  Restart1 = Restart + 1;
110  }
111 
112  if ( work == NULL ) {
113  printf("### ERROR: No enough memory for vFGMRES %s : %s : %d!\n",
114  __FILE__, __FUNCTION__, __LINE__ );
115  exit(ERROR_ALLOC_MEM);
116  }
117 
118  if ( prtlvl > PRINT_MIN && Restart < restart ) {
119  printf("### WARNING: vFGMRES restart number set to %d!\n", Restart);
120  }
121 
122  p = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
123  hh = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
124  z = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
125  norms = (REAL *)fasp_mem_calloc(MaxIt+1, sizeof(REAL));
126 
127  r = work; rs = r + n; c = rs + Restart1; s = c + Restart;
128  for ( i = 0; i < Restart1; i++ ) p[i] = s + Restart + i*n;
129  for ( i = 0; i < Restart1; i++ ) hh[i] = p[Restart] + n + i*Restart;
130  for ( i = 0; i < Restart1; i++ ) z[i] = hh[Restart] + Restart + i*n;
131 
132  /* initialization */
133  fasp_array_cp(n, b->val, p[0]);
134  fasp_blas_dcsr_aAxpy(-1.0, A, x->val, p[0]);
135 
136  b_norm = fasp_blas_array_norm2(n, b->val);
137  r_norm = fasp_blas_array_norm2(n, p[0]);
138  norms[0] = r_norm;
139 
140  if ( prtlvl >= PRINT_SOME) {
141  ITS_PUTNORM("right-hand side", b_norm);
142  ITS_PUTNORM("residual", r_norm);
143  }
144 
145  if ( b_norm > 0.0 ) den_norm = b_norm;
146  else den_norm = r_norm;
147 
148  epsilon = tol*den_norm;
149 
150  // if initial residual is small, no need to iterate!
151  if ( r_norm < epsilon || r_norm < 1e-3*tol ) goto FINISHED;
152 
153  if ( b_norm > 0.0 ) {
154  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,norms[iter],0);
155  }
156  else {
157  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],0);
158  }
159 
160  /* outer iteration cycle */
161  while ( iter < MaxIt ) {
162 
163  rs[0] = r_norm;
164  r_norm_old = r_norm;
165  if ( r_norm == 0.0 ) {
166  fasp_mem_free(work);
167  fasp_mem_free(p);
168  fasp_mem_free(hh);
169  fasp_mem_free(norms);
170  fasp_mem_free(z);
171  return iter;
172  }
173 
174  //-----------------------------------//
175  // adjust the restart parameter //
176  //-----------------------------------//
177 
178  if ( cr > cr_max || iter == 0 ) {
179  Restart = restart_max;
180  }
181  else if ( cr < cr_min ) {
182  // Restart = Restart;
183  }
184  else {
185  if ( Restart - d > restart_min ) {
186  Restart -= d;
187  }
188  else {
189  Restart = restart_max;
190  }
191  }
192 
193  // Always entry the cycle at the first iteration
194  // For at least one iteration step
195  t = 1.0 / r_norm;
196  fasp_blas_array_ax(n, t, p[0]);
197  i = 0;
198 
199  // RESTART CYCLE (right-preconditioning)
200  while ( i < Restart && iter < MaxIt ) {
201 
202  i ++; iter ++;
203 
204  /* apply the preconditioner */
205  if ( pc == NULL )
206  fasp_array_cp(n, p[i-1], z[i-1]);
207  else
208  pc->fct(p[i-1], z[i-1], pc->data);
209 
210  fasp_blas_dcsr_mxv(A, z[i-1], p[i]);
211 
212  /* modified Gram_Schmidt */
213  for ( j = 0; j < i; j++ ) {
214  hh[j][i-1] = fasp_blas_array_dotprod(n, p[j], p[i]);
215  fasp_blas_array_axpy(n, -hh[j][i-1], p[j], p[i]);
216  }
217  t = fasp_blas_array_norm2(n, p[i]);
218  hh[i][i-1] = t;
219  if ( t != 0.0 ) {
220  t = 1.0 / t;
221  fasp_blas_array_ax(n, t, p[i]);
222  }
223 
224  for ( j = 1; j < i; ++j ) {
225  t = hh[j-1][i-1];
226  hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
227  hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
228  }
229  t= hh[i][i-1]*hh[i][i-1];
230  t+= hh[i-1][i-1]*hh[i-1][i-1];
231  gamma = sqrt(t);
232  if (gamma == 0.0) gamma = epsmac;
233  c[i-1] = hh[i-1][i-1] / gamma;
234  s[i-1] = hh[i][i-1] / gamma;
235  rs[i] = -s[i-1]*rs[i-1];
236  rs[i-1] = c[i-1]*rs[i-1];
237  hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
238 
239  r_norm = fabs(rs[i]);
240  norms[iter] = r_norm;
241 
242  if ( b_norm > 0 ) {
243  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,
244  norms[iter],norms[iter]/norms[iter-1]);
245  }
246  else {
247  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],
248  norms[iter]/norms[iter-1]);
249  }
250 
251  /* should we exit the restart cycle? */
252  if (r_norm <= epsilon && iter >= min_iter) break;
253 
254  } /* end of restart cycle */
255 
256  /* now compute solution, first solve upper triangular system */
257 
258  rs[i-1] = rs[i-1] / hh[i-1][i-1];
259  for ( k = i-2; k >= 0; k-- ) {
260  t = 0.0;
261  for (j = k+1; j < i; j ++) t -= hh[k][j]*rs[j];
262 
263  t += rs[k];
264  rs[k] = t / hh[k][k];
265  }
266 
267  fasp_array_cp(n, z[i-1], r);
268  fasp_blas_array_ax(n, rs[i-1], r);
269 
270  for ( j = i-2; j >= 0; j-- ) fasp_blas_array_axpy(n, rs[j], z[j], r);
271 
272  fasp_blas_array_axpy(n, 1.0, r, x->val);
273 
274  if ( r_norm <= epsilon && iter >= min_iter ) {
275  fasp_array_cp(n, b->val, r);
276  fasp_blas_dcsr_aAxpy(-1.0, A, x->val, r);
277  r_norm = fasp_blas_array_norm2(n, r);
278 
279  switch (stop_type) {
280  case STOP_REL_RES:
281  relres = r_norm/den_norm;
282  break;
283  case STOP_REL_PRECRES:
284  if ( pc == NULL )
285  fasp_array_cp(n, r, p[0]);
286  else
287  pc->fct(r, p[0], pc->data);
288  r_normb = sqrt(fasp_blas_array_dotprod(n,p[0],r));
289  relres = r_normb/den_norm;
290  break;
291  case STOP_MOD_REL_RES:
292  normu = MAX(SMALLREAL,fasp_blas_array_norm2(n,x->val));
293  relres = r_norm/normu;
294  break;
295  default:
296  printf("### ERROR: Unrecognised stopping type for %s!\n", __FUNCTION__);
297  goto FINISHED;
298  }
299 
300  if ( relres <= tol ) {
301  break;
302  }
303  else {
304  if ( prtlvl >= PRINT_SOME ) ITS_FACONV;
305  fasp_array_cp(n, r, p[0]); i = 0;
306  }
307 
308  } /* end of convergence check */
309 
310  /* compute residual vector and continue loop */
311  for ( j = i; j > 0; j-- ) {
312  rs[j-1] = -s[j-1]*rs[j];
313  rs[j] = c[j-1]*rs[j];
314  }
315 
316  if (i) fasp_blas_array_axpy(n, rs[i]-1.0, p[i], p[i]);
317 
318  for ( j = i-1; j > 0; j-- ) fasp_blas_array_axpy(n, rs[j], p[j], p[i]);
319 
320  if (i) {
321  fasp_blas_array_axpy(n, rs[0]-1.0, p[0], p[0]);
322  fasp_blas_array_axpy(n, 1.0, p[i], p[0]);
323  }
324 
325  //-----------------------------------//
326  // compute the convergence rate //
327  //-----------------------------------//
328  cr = r_norm / r_norm_old;
329 
330  } /* end of iteration while loop */
331 
332  if ( prtlvl > PRINT_NONE ) ITS_FINAL(iter,MaxIt,r_norm/den_norm);
333 
334 FINISHED:
335  /*-------------------------------------------
336  * Free some stuff
337  *------------------------------------------*/
338  fasp_mem_free(work);
339  fasp_mem_free(p);
340  fasp_mem_free(hh);
341  fasp_mem_free(norms);
342  fasp_mem_free(z);
343 
344 #if DEBUG_MODE > 0
345  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
346 #endif
347 
348  if ( iter >= MaxIt )
349  return ERROR_SOLVER_MAXIT;
350  else
351  return iter;
352 }
353 
383  dvector *b,
384  dvector *x,
385  precond *pc,
386  const REAL tol,
387  const INT MaxIt,
388  const SHORT restart,
389  const SHORT stop_type,
390  const SHORT prtlvl)
391 {
392  const INT n = b->row;
393  const INT min_iter = 0;
394 
395  //--------------------------------------------//
396  // Newly added parameters to monitor when //
397  // to change the restart parameter //
398  //--------------------------------------------//
399  const REAL cr_max = 0.99; // = cos(8^o) (experimental)
400  const REAL cr_min = 0.174; // = cos(80^o) (experimental)
401 
402  // local variables
403  INT iter = 0;
404  INT i,j,k;
405 
406  REAL epsmac = SMALLREAL;
407  REAL r_norm, b_norm, den_norm;
408  REAL epsilon, gamma, t;
409  REAL relres, normu, r_normb;
410 
411  REAL *c = NULL, *s = NULL, *rs = NULL, *norms = NULL, *r = NULL;
412  REAL **p = NULL, **hh = NULL, **z=NULL;
413 
414  REAL cr = 1.0; // convergence rate
415  REAL r_norm_old = 0.0; // save the residual norm of the previous restart cycle
416  INT d = 3; // reduction for the restart parameter
417  INT restart_max = restart; // upper bound for restart in each restart cycle
418  INT restart_min = 3; // lower bound for restart in each restart cycle
419 
420  unsigned INT Restart = restart; // the real restart in some fixed restarted cycle
421  unsigned INT Restart1 = Restart + 1;
422  unsigned LONG worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
423 
424 #if DEBUG_MODE > 0
425  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
426  printf("### DEBUG: maxit = %d, tol = %.4le\n", MaxIt, tol);
427 #endif
428 
429  /* allocate memory and setup temp work space */
430  REAL *work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
431 
432  /* check whether memory is enough for GMRES */
433  while ( (work == NULL) && (Restart > 5) ) {
434  Restart = Restart - 5;
435  worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
436  work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
437  Restart1 = Restart + 1;
438  }
439 
440  if ( work == NULL ) {
441  printf("### ERROR: No enough memory for vFGMRES %s : %s : %d!\n",
442  __FILE__, __FUNCTION__, __LINE__ );
443  exit(ERROR_ALLOC_MEM);
444  }
445 
446  if ( prtlvl > PRINT_MIN && Restart < restart ) {
447  printf("### WARNING: vFGMRES restart number set to %d!\n", Restart);
448  }
449 
450  p = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
451  hh = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
452  z = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
453  norms = (REAL *)fasp_mem_calloc(MaxIt+1, sizeof(REAL));
454 
455  r = work; rs = r + n; c = rs + Restart1; s = c + Restart;
456  for ( i = 0; i < Restart1; i++ ) p[i] = s + Restart + i*n;
457  for ( i = 0; i < Restart1; i++ ) hh[i] = p[Restart] + n + i*Restart;
458  for ( i = 0; i < Restart1; i++ ) z[i] = hh[Restart] + Restart + i*n;
459 
460  /* initialization */
461  fasp_array_cp(n, b->val, p[0]);
462  fasp_blas_dbsr_aAxpy(-1.0, A, x->val, p[0]);
463 
464  b_norm = fasp_blas_array_norm2(n, b->val);
465  r_norm = fasp_blas_array_norm2(n, p[0]);
466  norms[0] = r_norm;
467 
468  if ( prtlvl >= PRINT_SOME) {
469  ITS_PUTNORM("right-hand side", b_norm);
470  ITS_PUTNORM("residual", r_norm);
471  }
472 
473  if ( b_norm > 0.0 ) den_norm = b_norm;
474  else den_norm = r_norm;
475 
476  epsilon = tol*den_norm;
477 
478  // if initial residual is small, no need to iterate!
479  if ( r_norm < epsilon || r_norm < 1e-3*tol ) goto FINISHED;
480 
481  if ( b_norm > 0.0 ) {
482  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,norms[iter],0);
483  }
484  else {
485  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],0);
486  }
487 
488  /* outer iteration cycle */
489  while ( iter < MaxIt ) {
490 
491  rs[0] = r_norm;
492  r_norm_old = r_norm;
493  if ( r_norm == 0.0 ) {
494  fasp_mem_free(work);
495  fasp_mem_free(p);
496  fasp_mem_free(hh);
497  fasp_mem_free(norms);
498  fasp_mem_free(z);
499  return iter;
500  }
501 
502  //-----------------------------------//
503  // adjust the restart parameter //
504  //-----------------------------------//
505 
506  if ( cr > cr_max || iter == 0 ) {
507  Restart = restart_max;
508  }
509  else if ( cr < cr_min ) {
510  // Restart = Restart;
511  }
512  else {
513  if ( Restart - d > restart_min ) {
514  Restart -= d;
515  }
516  else {
517  Restart = restart_max;
518  }
519  }
520 
521  // Always entry the cycle at the first iteration
522  // For at least one iteration step
523  t = 1.0 / r_norm;
524  fasp_blas_array_ax(n, t, p[0]);
525  i = 0;
526 
527  // RESTART CYCLE (right-preconditioning)
528  while ( i < Restart && iter < MaxIt ) {
529 
530  i ++; iter ++;
531 
532  /* apply the preconditioner */
533  if ( pc == NULL )
534  fasp_array_cp(n, p[i-1], z[i-1]);
535  else
536  pc->fct(p[i-1], z[i-1], pc->data);
537 
538  fasp_blas_dbsr_mxv(A, z[i-1], p[i]);
539 
540  /* modified Gram_Schmidt */
541  for ( j = 0; j < i; j++ ) {
542  hh[j][i-1] = fasp_blas_array_dotprod(n, p[j], p[i]);
543  fasp_blas_array_axpy(n, -hh[j][i-1], p[j], p[i]);
544  }
545  t = fasp_blas_array_norm2(n, p[i]);
546  hh[i][i-1] = t;
547  if ( t != 0.0 ) {
548  t = 1.0 / t;
549  fasp_blas_array_ax(n, t, p[i]);
550  }
551 
552  for ( j = 1; j < i; ++j ) {
553  t = hh[j-1][i-1];
554  hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
555  hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
556  }
557  t= hh[i][i-1]*hh[i][i-1];
558  t+= hh[i-1][i-1]*hh[i-1][i-1];
559  gamma = sqrt(t);
560  if (gamma == 0.0) gamma = epsmac;
561  c[i-1] = hh[i-1][i-1] / gamma;
562  s[i-1] = hh[i][i-1] / gamma;
563  rs[i] = -s[i-1]*rs[i-1];
564  rs[i-1] = c[i-1]*rs[i-1];
565  hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
566 
567  r_norm = fabs(rs[i]);
568  norms[iter] = r_norm;
569 
570  if ( b_norm > 0 ) {
571  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,
572  norms[iter],norms[iter]/norms[iter-1]);
573  }
574  else {
575  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],
576  norms[iter]/norms[iter-1]);
577  }
578 
579  /* should we exit the restart cycle? */
580  if (r_norm <= epsilon && iter >= min_iter) break;
581 
582  } /* end of restart cycle */
583 
584  /* now compute solution, first solve upper triangular system */
585 
586  rs[i-1] = rs[i-1] / hh[i-1][i-1];
587  for ( k = i-2; k >= 0; k-- ) {
588  t = 0.0;
589  for (j = k+1; j < i; j ++) t -= hh[k][j]*rs[j];
590 
591  t += rs[k];
592  rs[k] = t / hh[k][k];
593  }
594 
595  fasp_array_cp(n, z[i-1], r);
596  fasp_blas_array_ax(n, rs[i-1], r);
597 
598  for ( j = i-2; j >= 0; j-- ) fasp_blas_array_axpy(n, rs[j], z[j], r);
599 
600  fasp_blas_array_axpy(n, 1.0, r, x->val);
601 
602  if ( r_norm <= epsilon && iter >= min_iter ) {
603  fasp_array_cp(n, b->val, r);
604  fasp_blas_dbsr_aAxpy(-1.0, A, x->val, r);
605  r_norm = fasp_blas_array_norm2(n, r);
606 
607  switch (stop_type) {
608  case STOP_REL_RES:
609  relres = r_norm/den_norm;
610  break;
611  case STOP_REL_PRECRES:
612  if ( pc == NULL )
613  fasp_array_cp(n, r, p[0]);
614  else
615  pc->fct(r, p[0], pc->data);
616  r_normb = sqrt(fasp_blas_array_dotprod(n,p[0],r));
617  relres = r_normb/den_norm;
618  break;
619  case STOP_MOD_REL_RES:
620  normu = MAX(SMALLREAL,fasp_blas_array_norm2(n,x->val));
621  relres = r_norm/normu;
622  break;
623  default:
624  printf("### ERROR: Unrecognised stopping type for %s!\n", __FUNCTION__);
625  goto FINISHED;
626  }
627 
628  if ( relres <= tol ) {
629  break;
630  }
631  else {
632  if ( prtlvl >= PRINT_SOME ) ITS_FACONV;
633  fasp_array_cp(n, r, p[0]); i = 0;
634  }
635 
636  } /* end of convergence check */
637 
638  /* compute residual vector and continue loop */
639  for ( j = i; j > 0; j-- ) {
640  rs[j-1] = -s[j-1]*rs[j];
641  rs[j] = c[j-1]*rs[j];
642  }
643 
644  if (i) fasp_blas_array_axpy(n, rs[i]-1.0, p[i], p[i]);
645 
646  for ( j = i-1; j > 0; j-- ) fasp_blas_array_axpy(n, rs[j], p[j], p[i]);
647 
648  if (i) {
649  fasp_blas_array_axpy(n, rs[0]-1.0, p[0], p[0]);
650  fasp_blas_array_axpy(n, 1.0, p[i], p[0]);
651  }
652 
653  //-----------------------------------//
654  // compute the convergence rate //
655  //-----------------------------------//
656  cr = r_norm / r_norm_old;
657 
658  } /* end of iteration while loop */
659 
660  if ( prtlvl > PRINT_NONE ) ITS_FINAL(iter,MaxIt,r_norm/den_norm);
661 
662 FINISHED:
663  /*-------------------------------------------
664  * Free some stuff
665  *------------------------------------------*/
666  fasp_mem_free(work);
667  fasp_mem_free(p);
668  fasp_mem_free(hh);
669  fasp_mem_free(norms);
670  fasp_mem_free(z);
671 
672 #if DEBUG_MODE > 0
673  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
674 #endif
675 
676  if ( iter >= MaxIt )
677  return ERROR_SOLVER_MAXIT;
678  else
679  return iter;
680 }
681 
713  dvector *b,
714  dvector *x,
715  precond *pc,
716  const REAL tol,
717  const INT MaxIt,
718  const SHORT restart,
719  const SHORT stop_type,
720  const SHORT prtlvl)
721 {
722  const INT n = b->row;
723  const INT min_iter = 0;
724 
725  //--------------------------------------------//
726  // Newly added parameters to monitor when //
727  // to change the restart parameter //
728  //--------------------------------------------//
729  const REAL cr_max = 0.99; // = cos(8^o) (experimental)
730  const REAL cr_min = 0.174; // = cos(80^o) (experimental)
731 
732  // local variables
733  INT iter = 0;
734  INT i,j,k;
735 
736  REAL epsmac = SMALLREAL;
737  REAL r_norm, b_norm, den_norm;
738  REAL epsilon, gamma, t;
739  REAL relres, normu, r_normb;
740 
741  REAL *c = NULL, *s = NULL, *rs = NULL, *norms = NULL, *r = NULL;
742  REAL **p = NULL, **hh = NULL, **z=NULL;
743 
744  REAL cr = 1.0; // convergence rate
745  REAL r_norm_old = 0.0; // save the residual norm of the previous restart cycle
746  INT d = 3; // reduction for the restart parameter
747  INT restart_max = restart; // upper bound for restart in each restart cycle
748  INT restart_min = 3; // lower bound for restart in each restart cycle
749 
750  unsigned INT Restart = restart; // the real restart in some fixed restarted cycle
751  unsigned INT Restart1 = Restart + 1;
752  unsigned LONG worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
753 
754 #if DEBUG_MODE > 0
755  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
756  printf("### DEBUG: maxit = %d, tol = %.4le\n", MaxIt, tol);
757 #endif
758 
759  /* allocate memory and setup temp work space */
760  REAL *work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
761 
762  /* check whether memory is enough for GMRES */
763  while ( (work == NULL) && (Restart > 5) ) {
764  Restart = Restart - 5;
765  worksize = (Restart+4)*(Restart+n)+1-n+Restart*n;
766  work = (REAL *) fasp_mem_calloc(worksize, sizeof(REAL));
767  Restart1 = Restart + 1;
768  }
769 
770  if ( work == NULL ) {
771  printf("### ERROR: No enough memory for vFGMRES %s : %s : %d!\n",
772  __FILE__, __FUNCTION__, __LINE__ );
773  exit(ERROR_ALLOC_MEM);
774  }
775 
776  if ( prtlvl > PRINT_MIN && Restart < restart ) {
777  printf("### WARNING: vFGMRES restart number set to %d!\n", Restart);
778  }
779 
780  p = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
781  hh = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
782  z = (REAL **)fasp_mem_calloc(Restart1, sizeof(REAL *));
783  norms = (REAL *)fasp_mem_calloc(MaxIt+1, sizeof(REAL));
784 
785  r = work; rs = r + n; c = rs + Restart1; s = c + Restart;
786  for ( i = 0; i < Restart1; i++ ) p[i] = s + Restart + i*n;
787  for ( i = 0; i < Restart1; i++ ) hh[i] = p[Restart] + n + i*Restart;
788  for ( i = 0; i < Restart1; i++ ) z[i] = hh[Restart] + Restart + i*n;
789 
790  /* initialization */
791  fasp_array_cp(n, b->val, p[0]);
792  fasp_blas_bdcsr_aAxpy(-1.0, A, x->val, p[0]);
793 
794  b_norm = fasp_blas_array_norm2(n, b->val);
795  r_norm = fasp_blas_array_norm2(n, p[0]);
796  norms[0] = r_norm;
797 
798  if ( prtlvl >= PRINT_SOME) {
799  ITS_PUTNORM("right-hand side", b_norm);
800  ITS_PUTNORM("residual", r_norm);
801  }
802 
803  if ( b_norm > 0.0 ) den_norm = b_norm;
804  else den_norm = r_norm;
805 
806  epsilon = tol*den_norm;
807 
808  // if initial residual is small, no need to iterate!
809  if ( r_norm < epsilon || r_norm < 1e-3*tol ) goto FINISHED;
810 
811  if ( b_norm > 0.0 ) {
812  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,norms[iter],0);
813  }
814  else {
815  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],0);
816  }
817 
818  /* outer iteration cycle */
819  while ( iter < MaxIt ) {
820 
821  rs[0] = r_norm;
822  r_norm_old = r_norm;
823  if ( r_norm == 0.0 ) {
824  fasp_mem_free(work);
825  fasp_mem_free(p);
826  fasp_mem_free(hh);
827  fasp_mem_free(norms);
828  fasp_mem_free(z);
829  return iter;
830  }
831 
832  //-----------------------------------//
833  // adjust the restart parameter //
834  //-----------------------------------//
835 
836  if ( cr > cr_max || iter == 0 ) {
837  Restart = restart_max;
838  }
839  else if ( cr < cr_min ) {
840  // Restart = Restart;
841  }
842  else {
843  if ( Restart - d > restart_min ) {
844  Restart -= d;
845  }
846  else {
847  Restart = restart_max;
848  }
849  }
850 
851  // Always entry the cycle at the first iteration
852  // For at least one iteration step
853  t = 1.0 / r_norm;
854  fasp_blas_array_ax(n, t, p[0]);
855  i = 0;
856 
857  // RESTART CYCLE (right-preconditioning)
858  while ( i < Restart && iter < MaxIt ) {
859 
860  i ++; iter ++;
861 
862  /* apply the preconditioner */
863  if ( pc == NULL )
864  fasp_array_cp(n, p[i-1], z[i-1]);
865  else
866  pc->fct(p[i-1], z[i-1], pc->data);
867 
868  fasp_blas_bdcsr_mxv(A, z[i-1], p[i]);
869 
870  /* modified Gram_Schmidt */
871  for ( j = 0; j < i; j++ ) {
872  hh[j][i-1] = fasp_blas_array_dotprod(n, p[j], p[i]);
873  fasp_blas_array_axpy(n, -hh[j][i-1], p[j], p[i]);
874  }
875  t = fasp_blas_array_norm2(n, p[i]);
876  hh[i][i-1] = t;
877  if ( t != 0.0 ) {
878  t = 1.0 / t;
879  fasp_blas_array_ax(n, t, p[i]);
880  }
881 
882  for ( j = 1; j < i; ++j ) {
883  t = hh[j-1][i-1];
884  hh[j-1][i-1] = s[j-1]*hh[j][i-1] + c[j-1]*t;
885  hh[j][i-1] = -s[j-1]*t + c[j-1]*hh[j][i-1];
886  }
887  t= hh[i][i-1]*hh[i][i-1];
888  t+= hh[i-1][i-1]*hh[i-1][i-1];
889  gamma = sqrt(t);
890  if (gamma == 0.0) gamma = epsmac;
891  c[i-1] = hh[i-1][i-1] / gamma;
892  s[i-1] = hh[i][i-1] / gamma;
893  rs[i] = -s[i-1]*rs[i-1];
894  rs[i-1] = c[i-1]*rs[i-1];
895  hh[i-1][i-1] = s[i-1]*hh[i][i-1] + c[i-1]*hh[i-1][i-1];
896 
897  r_norm = fabs(rs[i]);
898  norms[iter] = r_norm;
899 
900  if ( b_norm > 0 ) {
901  print_itinfo(prtlvl,stop_type,iter,norms[iter]/b_norm,
902  norms[iter],norms[iter]/norms[iter-1]);
903  }
904  else {
905  print_itinfo(prtlvl,stop_type,iter,norms[iter],norms[iter],
906  norms[iter]/norms[iter-1]);
907  }
908 
909  /* should we exit the restart cycle? */
910  if (r_norm <= epsilon && iter >= min_iter) break;
911 
912  } /* end of restart cycle */
913 
914  /* now compute solution, first solve upper triangular system */
915 
916  rs[i-1] = rs[i-1] / hh[i-1][i-1];
917  for ( k = i-2; k >= 0; k-- ) {
918  t = 0.0;
919  for (j = k+1; j < i; j ++) t -= hh[k][j]*rs[j];
920 
921  t += rs[k];
922  rs[k] = t / hh[k][k];
923  }
924 
925  fasp_array_cp(n, z[i-1], r);
926  fasp_blas_array_ax(n, rs[i-1], r);
927 
928  for ( j = i-2; j >= 0; j-- ) fasp_blas_array_axpy(n, rs[j], z[j], r);
929 
930  fasp_blas_array_axpy(n, 1.0, r, x->val);
931 
932  if ( r_norm <= epsilon && iter >= min_iter ) {
933  fasp_array_cp(n, b->val, r);
934  fasp_blas_bdcsr_aAxpy(-1.0, A, x->val, r);
935  r_norm = fasp_blas_array_norm2(n, r);
936 
937  switch (stop_type) {
938  case STOP_REL_RES:
939  relres = r_norm/den_norm;
940  break;
941  case STOP_REL_PRECRES:
942  if ( pc == NULL )
943  fasp_array_cp(n, r, p[0]);
944  else
945  pc->fct(r, p[0], pc->data);
946  r_normb = sqrt(fasp_blas_array_dotprod(n,p[0],r));
947  relres = r_normb/den_norm;
948  break;
949  case STOP_MOD_REL_RES:
950  normu = MAX(SMALLREAL,fasp_blas_array_norm2(n,x->val));
951  relres = r_norm/normu;
952  break;
953  default:
954  printf("### ERROR: Unrecognised stopping type for %s!\n", __FUNCTION__);
955  goto FINISHED;
956  }
957 
958  if ( relres <= tol ) {
959  break;
960  }
961  else {
962  if ( prtlvl >= PRINT_SOME ) ITS_FACONV;
963  fasp_array_cp(n, r, p[0]); i = 0;
964  }
965 
966  } /* end of convergence check */
967 
968  /* compute residual vector and continue loop */
969  for ( j = i; j > 0; j-- ) {
970  rs[j-1] = -s[j-1]*rs[j];
971  rs[j] = c[j-1]*rs[j];
972  }
973 
974  if (i) fasp_blas_array_axpy(n, rs[i]-1.0, p[i], p[i]);
975 
976  for ( j = i-1; j > 0; j-- ) fasp_blas_array_axpy(n, rs[j], p[j], p[i]);
977 
978  if (i) {
979  fasp_blas_array_axpy(n, rs[0]-1.0, p[0], p[0]);
980  fasp_blas_array_axpy(n, 1.0, p[i], p[0]);
981  }
982 
983  //-----------------------------------//
984  // compute the convergence rate //
985  //-----------------------------------//
986  cr = r_norm / r_norm_old;
987 
988  } /* end of iteration while loop */
989 
990  if ( prtlvl > PRINT_NONE ) ITS_FINAL(iter,MaxIt,r_norm/den_norm);
991 
992 FINISHED:
993  /*-------------------------------------------
994  * Free some stuff
995  *------------------------------------------*/
996  fasp_mem_free(work);
997  fasp_mem_free(p);
998  fasp_mem_free(hh);
999  fasp_mem_free(norms);
1000  fasp_mem_free(z);
1001 
1002 #if DEBUG_MODE > 0
1003  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
1004 #endif
1005 
1006  if ( iter >= MaxIt )
1007  return ERROR_SOLVER_MAXIT;
1008  else
1009  return iter;
1010 }
1011 
1012 /*---------------------------------*/
1013 /*-- End of File --*/
1014 /*---------------------------------*/
#define REAL
Definition: fasp.h:67
void fasp_blas_bdcsr_mxv(block_dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: blas_bcsr.c:155
void fasp_blas_bdcsr_aAxpy(const REAL alpha, block_dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: blas_bcsr.c:30
#define STOP_MOD_REL_RES
Definition: fasp_const.h:133
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:44
REAL * val
actual vector entries
Definition: fasp.h:348
void * fasp_mem_calloc(LONGLONG size, INT type)
1M = 1024*1024
Definition: memory.c:60
void(* fct)(REAL *, REAL *, void *)
action for preconditioner, void function pointer
Definition: fasp.h:1005
Vector with n entries of REAL type.
Definition: fasp.h:342
#define ERROR_SOLVER_MAXIT
Definition: fasp_const.h:54
#define PRINT_SOME
Definition: fasp_const.h:81
#define INT
Definition: fasp.h:64
void print_itinfo(const INT ptrlvl, const INT stop_type, const INT iter, const REAL relres, const REAL absres, const REAL factor)
Print out iteration information for iterative solvers.
Definition: message.c:36
Preconditioner data and action.
Definition: fasp.h:999
void fasp_mem_free(void *mem)
Free up previous allocated memory body.
Definition: memory.c:150
void * data
data for preconditioner, void pointer
Definition: fasp.h:1002
#define STOP_REL_PRECRES
Definition: fasp_const.h:132
Main header file for FASP.
#define ERROR_ALLOC_MEM
Definition: fasp_const.h:37
#define PRINT_NONE
Print level for all subroutines – not including DEBUG output.
Definition: fasp_const.h:79
Sparse matrix of REAL type in CSR format.
Definition: fasp.h:148
void fasp_blas_array_axpy(const INT n, const REAL a, REAL *x, REAL *y)
y = a*x + y
Definition: blas_array.c:87
#define STOP_REL_RES
Definition of iterative solver stopping criteria types.
Definition: fasp_const.h:131
INT row
number of rows
Definition: fasp.h:345
#define LONG
Definition: fasp.h:65
Block REAL CSR matrix format.
Definition: fasp_block.h:84
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
void fasp_blas_dbsr_mxv(dBSRmat *A, REAL *x, REAL *y)
Compute y := A*x.
Definition: blas_bsr.c:895
REAL fasp_blas_array_dotprod(const INT n, const REAL *x, const REAL *y)
Inner product of two arraies (x,y)
Definition: blas_array.c:267
REAL fasp_blas_array_norm2(const INT n, const REAL *x)
L2 norm of array x.
Definition: blas_array.c:347
INT fasp_solver_dbsr_pvfgmres(dBSRmat *A, dvector *b, dvector *x, precond *pc, const REAL tol, const INT MaxIt, const SHORT restart, const SHORT stop_type, const SHORT prtlvl)
Solve "Ax=b" using PFGMRES(right preconditioned) iterative method in which the restart parameter can ...
Definition: pvfgmres.c:382
INT fasp_solver_bdcsr_pvfgmres(block_dCSRmat *A, dvector *b, dvector *x, precond *pc, const REAL tol, const INT MaxIt, const SHORT restart, const SHORT stop_type, const SHORT prtlvl)
Solve "Ax=b" using PFGMRES (right preconditioned) iterative method in which the restart parameter can...
Definition: pvfgmres.c:712
void fasp_blas_array_ax(const INT n, const REAL a, REAL *x)
x = a*x
Definition: blas_array.c:35
void fasp_blas_dcsr_aAxpy(const REAL alpha, dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = alpha*A*x + y.
Definition: blas_csr.c:479
void fasp_array_cp(const INT n, REAL *x, REAL *y)
Copy an array to the other y=x.
Definition: array.c:165
#define MAX(a, b)
Definition of max, min, abs.
Definition: fasp.h:72
void fasp_blas_dbsr_aAxpy(const REAL alpha, dBSRmat *A, REAL *x, REAL *y)
Compute y := alpha*A*x + y.
Definition: blas_bsr.c:337
INT fasp_solver_dcsr_pvfgmres(dCSRmat *A, dvector *b, dvector *x, precond *pc, const REAL tol, const INT MaxIt, const SHORT restart, const SHORT stop_type, const SHORT prtlvl)
Solve "Ax=b" using PFGMRES(right preconditioned) iterative method in which the restart parameter can ...
Definition: pvfgmres.c:54
#define SMALLREAL
Definition: fasp_const.h:238
#define PRINT_MIN
Definition: fasp_const.h:80
void fasp_blas_dcsr_mxv(dCSRmat *A, REAL *x, REAL *y)
Matrix-vector multiplication y = A*x.
Definition: blas_csr.c:225