Fast Auxiliary Space Preconditioning  1.8.4 Feb/15/2016
ilu_setup_bsr.c
Go to the documentation of this file.
1 
6 #include <math.h>
7 #include <time.h>
8 
9 #include "fasp.h"
10 #include "fasp_functs.h"
11 
12 /* The following functions are defined in ilu.f */
13 #ifdef __cplusplus
14 extern "C" {void symbfactor_(const INT *n,INT *colind,INT *rwptr,
15  const INT *levfill,const INT *nzmax,
16  INT *nzlu,INT *ijlu,INT *uptr,INT *ierr);}
17 #else
18 extern void symbfactor_(const INT *n,INT *colind,INT *rwptr,const INT *levfill,
19  const INT *nzmax,INT *nzlu,INT *ijlu,INT *uptr,INT *ierr);
20 #endif
21 
22 static INT numfac_bsr(dBSRmat *A, REAL *luval, INT *jlu, INT *uptr);
23 
24 /*---------------------------------*/
25 /*-- Public Functions --*/
26 /*---------------------------------*/
27 
46  ILU_data *iludata,
47  ILU_param *iluparam)
48 {
49 
50  const SHORT prtlvl = iluparam->print_level;
51  const INT n = A->COL, nnz = A->NNZ, nb = A->nb, nb2 = nb*nb;
52 
53  // local variables
54  INT lfil=iluparam->ILU_lfil;
55  INT ierr, iwk, nzlu, nwork, *ijlu, *uptr;
56 
57  REAL setup_start, setup_end, setup_duration;
58  SHORT status = FASP_SUCCESS;
59 
60 #if DEBUG_MODE > 0
61  printf("### DEBUG: %s ...... [Start]\n", __FUNCTION__);
62  printf("### DEBUG: m=%d, n=%d, nnz=%d\n", A->ROW, n, nnz);
63 #endif
64 
65  fasp_gettime(&setup_start);
66 
67  // Expected amount of memory for ILU needed and allocate memory
68  iwk = (lfil+2)*nnz;
69 
70  // setup preconditioner
71  iludata->row = iludata->col=n;
72  iludata->nb = nb;
73 
74  ijlu = (INT*)fasp_mem_calloc(iwk,sizeof(INT));
75  uptr = (INT*)fasp_mem_calloc(A->ROW,sizeof(INT));
76 
77 #if DEBUG_MODE > 1
78  printf("### DEBUG: symbolic factorization ... \n ");
79 #endif
80 
81  // ILU decomposition
82  // (1) symbolic factoration
83  symbfactor_(&A->ROW,A->JA,A->IA,&lfil,&iwk,&nzlu,ijlu,uptr,&ierr);
84 
85  iludata->luval = (REAL*)fasp_mem_calloc(nzlu*nb2,sizeof(REAL));
86 
87 #if DEBUG_MODE > 1
88  printf("### DEBUG: numerical factorization ... \n ");
89 #endif
90 
91  // (2) numerical factoration
92  numfac_bsr(A, iludata->luval, ijlu, uptr);
93 
94  //nwork = 6*nzlu*nb;
95  nwork = 5*A->ROW*A->nb;
96  iludata->nzlu = nzlu;
97  iludata->nwork = nwork;
98  iludata->ijlu = (INT*)fasp_mem_calloc(nzlu,sizeof(INT));
99 
100  memcpy(iludata->ijlu,ijlu,nzlu*sizeof(INT));
101  iludata->work = (REAL*)fasp_mem_calloc(nwork, sizeof(REAL));
102  // Check: Is the work space too large? --Xiaozhe
103 
104 #if DEBUG_MODE > 1
105  printf("### DEBUG: fill-in = %d, nwork = %d\n", lfil, nwork);
106  printf("### DEBUG: iwk = %d, nzlu = %d\n",iwk,nzlu);
107 #endif
108 
109  if ( ierr != 0 ) {
110  printf("### ERROR: ILU setup failed (ierr=%d)!\n", ierr);
111  status = ERROR_SOLVER_ILUSETUP;
112  goto FINISHED;
113  }
114 
115  if ( iwk < nzlu ) {
116  printf("### ERROR: Need more memory for ILU %d!\n", iwk-nzlu);
117  status = ERROR_SOLVER_ILUSETUP;
118  goto FINISHED;
119  }
120 
121  if ( prtlvl > PRINT_NONE ) {
122  fasp_gettime(&setup_end);
123  setup_duration = setup_end - setup_start;
124  printf("BSR ILU(%d) setup costs %f seconds.\n", lfil,setup_duration);
125  }
126 
127  FINISHED:
128  fasp_mem_free(ijlu);
129  fasp_mem_free(uptr);
130 
131 #if DEBUG_MODE > 0
132  printf("### DEBUG: %s ...... [Finish]\n", __FUNCTION__);
133 #endif
134 
135  return status;
136 }
137 
138 /*---------------------------------*/
139 /*-- Private Functions --*/
140 /*---------------------------------*/
141 
156 static INT numfac_bsr (dBSRmat *A,
157  REAL *luval,
158  INT *jlu,
159  INT *uptr)
160 {
161  INT n=A->ROW,nb=A->nb, nb2=nb*nb, ib, ibstart,ibstart1;
162  INT k, indj, inds, indja,jluj, jlus, ijaj;
163  REAL *mult,*mult1;
164  INT *colptrs;
165  INT status=FASP_SUCCESS;
166 
167  colptrs=(INT*)fasp_mem_calloc(n,sizeof(INT));
168  mult=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
169  mult1=(REAL*)fasp_mem_calloc(nb2,sizeof(REAL));
170 
185  //for (k=0;k<n;k++) colptrs[k]=0;
186  memset(colptrs, 0, sizeof(INT)*n);
187 
188  switch (nb) {
189 
190  case 1:
191 
192  for (k = 0; k < n; ++k) {
193 
194  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
195  colptrs[jlu[indj]] = indj;
196  ibstart=indj*nb2;
197  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
198  }
199 
200  colptrs[k] = k;
201 
202  for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
203  ijaj = A->JA[indja];
204  ibstart=colptrs[ijaj]*nb2;
205  ibstart1=indja*nb2;
206  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
207  }
208 
209  for (indj = jlu[k]; indj < uptr[k]; ++indj) {
210 
211  jluj = jlu[indj];
212 
213  luval[indj] = luval[indj]*luval[jluj];
214  mult[0] = luval[indj];
215 
216  for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
217  jlus = jlu[inds];
218  if (colptrs[jlus] != 0)
219  luval[colptrs[jlus]] = luval[colptrs[jlus]] - mult[0]*luval[inds];
220  }
221 
222  }
223 
224  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
225 
226  colptrs[k] = 0;
227  luval[k] = 1.0/luval[k];
228  }
229 
230  break;
231 
232  case 3:
233 
234  for (k = 0; k < n; ++k) {
235 
236  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
237  colptrs[jlu[indj]] = indj;
238  ibstart=indj*nb2;
239  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
240  }
241 
242  colptrs[k] = k;
243 
244  for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
245  ijaj = A->JA[indja];
246  ibstart=colptrs[ijaj]*nb2;
247  ibstart1=indja*nb2;
248  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
249  }
250 
251  for (indj = jlu[k]; indj < uptr[k]; ++indj) {
252  jluj = jlu[indj];
253 
254  ibstart=indj*nb2;
255  fasp_blas_smat_mul_nc3(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
256  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
257 
258  for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
259  jlus = jlu[inds];
260  if (colptrs[jlus] != 0) {
261  fasp_blas_smat_mul_nc3(mult,&(luval[inds*nb2]),mult1);
262  ibstart=colptrs[jlus]*nb2;
263  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
264  }
265  }
266 
267  }
268 
269  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
270 
271  colptrs[k] = 0;
272 
273  fasp_blas_smat_inv_nc3(&(luval[k*nb2]));
274 
275  }
276 
277  break;
278 
279  case 5:
280 
281  for (k = 0; k < n; ++k) {
282 
283  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
284  colptrs[jlu[indj]] = indj;
285  ibstart=indj*nb2;
286  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
287  }
288 
289  colptrs[k] = k;
290 
291  for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
292  ijaj = A->JA[indja];
293  ibstart=colptrs[ijaj]*nb2;
294  ibstart1=indja*nb2;
295  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
296  }
297 
298  for (indj = jlu[k]; indj < uptr[k]; ++indj) {
299  jluj = jlu[indj];
300 
301  ibstart=indj*nb2;
302  fasp_blas_smat_mul_nc5(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
303  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
304 
305  for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
306  jlus = jlu[inds];
307  if (colptrs[jlus] != 0) {
308  fasp_blas_smat_mul_nc5(mult,&(luval[inds*nb2]),mult1);
309  ibstart=colptrs[jlus]*nb2;
310  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
311  }
312  }
313 
314  }
315 
316  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
317 
318  colptrs[k] = 0;
319 
320  fasp_blas_smat_inv_nc5(&(luval[k*nb2]));
321  }
322 
323  break;
324 
325  case 7:
326 
327  for (k = 0; k < n; ++k) {
328 
329  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) {
330  colptrs[jlu[indj]] = indj;
331  ibstart=indj*nb2;
332  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
333  }
334 
335  colptrs[k] = k;
336 
337  for (indja = A->IA[k]; indja < A->IA[k+1]; ++indja) {
338  ijaj = A->JA[indja];
339  ibstart=colptrs[ijaj]*nb2;
340  ibstart1=indja*nb2;
341  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
342  }
343 
344  for (indj = jlu[k]; indj < uptr[k]; ++indj) {
345  jluj = jlu[indj];
346 
347  ibstart=indj*nb2;
348  fasp_blas_smat_mul_nc7(&(luval[ibstart]),&(luval[jluj*nb2]),mult);
349  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
350 
351  for (inds = uptr[jluj]; inds < jlu[jluj+1]; ++inds) {
352  jlus = jlu[inds];
353  if (colptrs[jlus] != 0) {
354  fasp_blas_smat_mul_nc7(mult,&(luval[inds*nb2]),mult1);
355  ibstart=colptrs[jlus]*nb2;
356  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
357  }
358  }
359 
360  }
361 
362  for (indj = jlu[k]; indj < jlu[k+1]; ++indj) colptrs[jlu[indj]] = 0;
363 
364  colptrs[k] = 0;
365 
366  fasp_blas_smat_inv(&(luval[k*nb2]),nb);
367  }
368 
369  break;
370 
371  default:
372 
373  for (k=0;k<n;k++) {
374 
375  for (indj = jlu[k];indj<jlu[k+1];++indj) {
376  colptrs[jlu[indj]] = indj;
377  ibstart=indj*nb2;
378  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = 0;
379  }
380 
381  colptrs[k] = k;
382 
383  for (indja = A->IA[k]; indja < A->IA[k+1];indja++) {
384  ijaj = A->JA[indja];
385  ibstart=colptrs[ijaj]*nb2;
386  ibstart1=indja*nb2;
387  for (ib=0;ib<nb2;++ib) luval[ibstart+ib] = A->val[ibstart1+ib];
388  }
389 
390  for (indj = jlu[k]; indj < uptr[k]; ++indj) {
391  jluj = jlu[indj];
392 
393  ibstart=indj*nb2;
394  fasp_blas_smat_mul(&(luval[ibstart]),&(luval[jluj*nb2]),mult,nb);
395  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]=mult[ib];
396 
397  for (inds = uptr[jluj]; inds < jlu[jluj+1]; inds++) {
398  jlus = jlu[inds];
399  if (colptrs[jlus] != 0) {
400  fasp_blas_smat_mul(mult,&(luval[inds*nb2]),mult1,nb);
401  ibstart=colptrs[jlus]*nb2;
402  for (ib=0;ib<nb2;++ib) luval[ibstart+ib]-=mult1[ib];
403  }
404  }
405 
406  }
407 
408  for (indj = jlu[k];indj<jlu[k+1];++indj)
409  colptrs[jlu[indj]] = 0;
410 
411  colptrs[k] = 0;
412 
413  fasp_blas_smat_inv(&(luval[k*nb2]),nb);
414 
415  }
416  }
417 
418  fasp_mem_free(colptrs);
419  fasp_mem_free(mult);
420  fasp_mem_free(mult1);
421 
422  return status;
423 }
424 
425 /*---------------------------------*/
426 /*-- End of File --*/
427 /*---------------------------------*/
INT col
column of matrix LU, n
Definition: fasp.h:406
INT * ijlu
integer array of row pointers and column indexes, the size is nzlu
Definition: fasp.h:412
#define REAL
Definition: fasp.h:67
INT fasp_blas_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
Definition: smat.c:554
INT nb
dimension of each sub-block
Definition: fasp_block.h:56
void fasp_blas_smat_mul_nc5(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: blas_smat.c:299
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:44
INT ILU_lfil
level of fill-in for ILUk
Definition: fasp.h:383
void * fasp_mem_calloc(LONGLONG size, INT type)
1M = 1024*1024
Definition: memory.c:60
void fasp_blas_smat_mul(REAL *a, REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: blas_smat.c:448
#define INT
Definition: fasp.h:64
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:27
Data for ILU setup.
Definition: fasp.h:400
void fasp_blas_smat_mul_nc3(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: blas_smat.c:262
INT nwork
work space size
Definition: fasp.h:421
void fasp_mem_free(void *mem)
Free up previous allocated memory body.
Definition: memory.c:150
SHORT fasp_ilu_dbsr_setup(dBSRmat *A, ILU_data *iludata, ILU_param *iluparam)
Get ILU decoposition of a BSR matrix A.
Definition: ilu_setup_bsr.c:45
REAL * val
Definition: fasp_block.h:67
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
Definition: fasp_block.h:53
REAL * luval
nonzero entries of LU
Definition: fasp.h:415
#define ERROR_SOLVER_ILUSETUP
Definition: fasp_const.h:52
Main header file for FASP.
void fasp_gettime(REAL *time)
Get system time.
Definition: timing.c:28
INT row
row number of matrix LU, m
Definition: fasp.h:403
#define PRINT_NONE
Print level for all subroutines – not including DEBUG output.
Definition: fasp_const.h:79
void fasp_blas_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
Definition: smat.c:173
INT COL
number of cols of sub-blocks in matrix A, N
Definition: fasp_block.h:50
void fasp_blas_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
Definition: smat.c:61
void fasp_blas_smat_mul_nc7(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: blas_smat.c:358
Parameters for ILU.
Definition: fasp.h:374
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:47
INT nb
block size for BSR type only
Definition: fasp.h:418
INT * JA
Definition: fasp_block.h:74
REAL * work
work space
Definition: fasp.h:424
INT nzlu
number of nonzero entries
Definition: fasp.h:409
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:70
SHORT print_level
print level
Definition: fasp.h:377