Fast Auxiliary Space Preconditioning  1.8.4 Feb/15/2016
sparse_bsr.c
Go to the documentation of this file.
1 
6 #ifdef _OPENMP
7 #include <omp.h>
8 #endif
9 
10 #include <math.h>
11 
12 #include "fasp.h"
13 #include "fasp_functs.h"
14 
15 /*---------------------------------*/
16 /*-- Public Functions --*/
17 /*---------------------------------*/
18 
37  const INT COL,
38  const INT NNZ,
39  const INT nb,
40  const INT storage_manner)
41 {
42  dBSRmat A;
43 
44  if ( ROW > 0 ) {
45  A.IA = (INT*)fasp_mem_calloc(ROW+1, sizeof(INT));
46  }
47  else {
48  A.IA = NULL;
49  }
50 
51  if ( NNZ > 0 ) {
52  A.JA = (INT*)fasp_mem_calloc(NNZ ,sizeof(INT));
53  }
54  else {
55  A.JA = NULL;
56  }
57 
58  if ( nb > 0 && NNZ > 0) {
59  A.val = (REAL*)fasp_mem_calloc(NNZ*nb*nb, sizeof(REAL));
60  }
61  else {
62  A.val = NULL;
63  }
64 
65  A.ROW = ROW; A.COL = COL; A.NNZ = NNZ;
66  A.nb = nb; A.storage_manner = storage_manner;
67 
68  return A;
69 }
70 
87 void fasp_dbsr_alloc (const INT ROW,
88  const INT COL,
89  const INT NNZ,
90  const INT nb,
91  const INT storage_manner,
92  dBSRmat *A)
93 {
94  if ( ROW > 0 ) {
95  A->IA = (INT*)fasp_mem_calloc(ROW+1, sizeof(INT));
96  }
97  else {
98  A->IA = NULL;
99  }
100 
101  if ( NNZ > 0 ) {
102  A->JA = (INT*)fasp_mem_calloc(NNZ, sizeof(INT));
103  }
104  else {
105  A->JA = NULL;
106  }
107 
108  if ( nb > 0 ) {
109  A->val = (REAL*)fasp_mem_calloc(NNZ*nb*nb, sizeof(REAL));
110  // A->val = (REAL*)malloc(NNZ*nb*nb*sizeof(REAL));
111  }
112  else {
113  A->val = NULL;
114  }
115 
116  A->ROW = ROW; A->COL = COL; A->NNZ = NNZ;
117  A->nb = nb; A->storage_manner = storage_manner;
118 
119  return;
120 }
121 
122 
134 {
135  if (A==NULL) return;
136 
137  fasp_mem_free(A->IA);
138  fasp_mem_free(A->JA);
139  fasp_mem_free(A->val);
140 
141  A->ROW = 0;
142  A->COL = 0;
143  A->NNZ = 0;
144  A->nb = 0;
145  A->storage_manner = 0;
146 }
147 
159 {
160  A->ROW=0;
161  A->COL=0;
162  A->NNZ=0;
163  A->nb=0;
164  A->storage_manner=0;
165  A->IA=NULL;
166  A->JA=NULL;
167  A->val=NULL;
168 }
169 
182  dBSRmat *B)
183 {
184  B->ROW=A->ROW;
185  B->COL=A->COL;
186  B->NNZ=A->NNZ;
187  B->nb = A->nb;
189 
190  memcpy(B->IA,A->IA,(A->ROW+1)*sizeof(INT));
191  memcpy(B->JA,A->JA,(A->NNZ)*sizeof(INT));
192  memcpy(B->val,A->val,(A->NNZ)*(A->nb)*(A->nb)*sizeof(REAL));
193 }
194 
209  dBSRmat *AT)
210 {
211  const INT n=A->ROW, m=A->COL, nnz=A->NNZ, nb=A->nb;
212  INT i,j,k,p,inb,jnb,nb2;
213  INT status = FASP_SUCCESS;
214 
215  AT->ROW=m;
216  AT->COL=n;
217  AT->NNZ=nnz;
218  AT->nb = nb;
220 
221  AT->IA=(INT*)fasp_mem_calloc(m+1,sizeof(INT));
222  nb2 = A->nb*A->nb;
223 
224  AT->JA=(INT*)fasp_mem_calloc(nnz,sizeof(INT));
225 
226  if (A->val) {
227  AT->val=(REAL*)fasp_mem_calloc(nnz*nb*nb,sizeof(REAL));
228  }
229  else {
230  AT->val=NULL;
231  }
232 
233  // first pass: find the number of nonzeros in the first m-1 columns of A
234  // Note: these numbers are stored in the array AT.IA from 1 to m-1
235  fasp_iarray_set(m+1, AT->IA, 0);
236 
237  for (j=0;j<nnz;++j) {
238  i=A->JA[j]; // column number of A = row number of A'
239  if (i<m-1) AT->IA[i+2]++;
240  }
241 
242  for (i=2;i<=m;++i) AT->IA[i]+=AT->IA[i-1];
243 
244  // second pass: form A'
245  if (A->val) {
246  for (i=0;i<n;++i) {
247  INT ibegin=A->IA[i], iend1=A->IA[i+1];
248  for (p=ibegin;p<iend1;p++) {
249  j=A->JA[p]+1;
250  k=AT->IA[j];
251  AT->JA[k]=i;
252  for (inb=0;inb<nb;inb++)
253  for (jnb=0;jnb<nb;jnb++)
254  AT->val[nb2*k + inb*nb + jnb] = A->val[nb2*p + jnb*nb + inb];
255  AT->IA[j]=k+1;
256  } // end for p
257  } // end for i
258 
259  }
260  else {
261  for (i=0;i<n;++i) {
262  INT ibegin=A->IA[i], iend1=A->IA[i+1];
263  for (p=ibegin;p<iend1;p++) {
264  j=A->JA[p]+1;
265  k=AT->IA[j];
266  AT->JA[k]=i;
267  AT->IA[j]=k+1;
268  } // end for p
269  } // end of i
270 
271  } // end if
272 
273  return (status);
274 }
275 
293 {
294  SHORT status = FASP_SUCCESS;
295  const INT num_rowsA = A -> ROW;
296  const INT num_colsA = A -> COL;
297  const INT nb = A->nb;
298  const INT nb2 = nb*nb;
299  const INT *A_i = A -> IA;
300  INT *A_j = A -> JA;
301  REAL *A_data = A -> val;
302 
303  INT i, j, tempi, row_size;
304 
305 #ifdef _OPENMP
306  // variables for OpenMP
307  INT myid, mybegin, myend, ibegin, iend;
308  INT nthreads = FASP_GET_NUM_THREADS();
309 #endif
310 
311  /* the matrix should be square */
312  if (num_rowsA != num_colsA) return ERROR_INPUT_PAR;
313 
314 #ifdef _OPENMP
315  if (num_rowsA > OPENMP_HOLDS) {
316  REAL *tempd = (REAL*)fasp_mem_calloc(nb2*nthreads, sizeof(REAL));
317 #pragma omp parallel for private (myid,mybegin,myend,i,j,tempi,ibegin,iend)
318  for (myid = 0; myid < nthreads; myid++) {
319  FASP_GET_START_END(myid, nthreads, num_rowsA, &mybegin, &myend);
320  for (i = mybegin; i < myend; i++) {
321  ibegin = A_i[i+1]; iend = A_i[i];
322  for (j = ibegin; j < iend; j ++) {
323  if (A_j[j] == i) {
324  if (j != ibegin) {
325  // swap index
326  tempi = A_j[ibegin];
327  A_j[ibegin] = A_j[j];
328  A_j[j] = tempi;
329  // swap block
330  memcpy(tempd+myid*nb2, A_data+ibegin*nb2, (nb2)*sizeof(REAL));
331  memcpy(A_data+ibegin*nb2, A_data+j*nb2, (nb2)*sizeof(REAL));
332  memcpy(A_data+j*nb2, tempd+myid*nb2, (nb2)*sizeof(REAL));
333  }
334  break;
335  }
336  /* diagonal element is missing */
337  if (j == iend-1) {
338  status = -2;
339  break;
340  }
341  }
342  }
343  }
344  fasp_mem_free(tempd);
345  }
346  else {
347 #endif
348  REAL *tempd = (REAL*)fasp_mem_calloc(nb2, sizeof(REAL));
349  for (i = 0; i < num_rowsA; i ++) {
350  row_size = A_i[i+1] - A_i[i];
351  for (j = 0; j < row_size; j ++) {
352  if (A_j[j] == i) {
353  if (j != 0) {
354  // swap index
355  tempi = A_j[0];
356  A_j[0] = A_j[j];
357  A_j[j] = tempi;
358  // swap block
359  memcpy(tempd, A_data, (nb2)*sizeof(REAL));
360  memcpy(A_data, A_data+j*nb2, (nb2)*sizeof(REAL));
361  memcpy(A_data+j*nb2, tempd, (nb2)*sizeof(REAL));
362  }
363  break;
364  }
365  /* diagonal element is missing */
366  if (j == row_size-1) return -2;
367  }
368  A_j += row_size;
369  A_data += row_size*nb2;
370  }
371  fasp_mem_free(tempd);
372 #ifdef _OPENMP
373  }
374 #endif
375 
376  if (status < 0) return status;
377  else return FASP_SUCCESS;
378 }
379 
393 {
394  // members of A
395  const INT ROW = A->ROW;
396  const INT nb = A->nb;
397  const INT nb2 = nb*nb;
398  const INT size = ROW*nb2;
399  const INT *IA = A->IA;
400  const INT *JA = A->JA;
401  REAL *val = A->val;
402 
403  dvector diaginv;
404 
405  INT i,k;
406 
407  // Variables for OpenMP
408  INT nthreads = 1, use_openmp = FALSE;
409  INT myid, mybegin, myend;
410 
411 #ifdef _OPENMP
412  if (ROW > OPENMP_HOLDS) {
413  use_openmp = TRUE;
414  nthreads = FASP_GET_NUM_THREADS();
415  }
416 #endif
417 
418  // allocate memory
419  diaginv.row = size;
420  diaginv.val = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
421 
422  // get all the diagonal sub-blocks
423  if (use_openmp) {
424 #ifdef _OPENMP
425 #pragma omp parallel for private(myid, i, mybegin, myend, k)
426 #endif
427  for (myid = 0; myid < nthreads; myid++) {
428  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
429  for (i = mybegin; i < myend; ++i) {
430  for (k = IA[i]; k < IA[i+1]; ++k) {
431  if (JA[k] == i)
432  memcpy(diaginv.val+i*nb2, val+k*nb2, nb2*sizeof(REAL));
433  }
434  }
435  }
436  }
437  else {
438  for (i = 0; i < ROW; ++i) {
439  for (k = IA[i]; k < IA[i+1]; ++k) {
440  if (JA[k] == i)
441  memcpy(diaginv.val+i*nb2, val+k*nb2, nb2*sizeof(REAL));
442  }
443  }
444  }
445  // compute the inverses of all the diagonal sub-blocks
446  if (use_openmp) {
447 #ifdef _OPENMP
448 #pragma omp parallel for private(myid, i, mybegin, myend)
449 #endif
450  for (myid = 0; myid < nthreads; myid++) {
451  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
452  if (nb > 1) {
453  for (i = mybegin; i < myend; ++i) {
454  fasp_blas_smat_inv(diaginv.val+i*nb2, nb);
455  }
456  }
457  else {
458  for (i = mybegin; i < myend; ++i) {
459  // zero-diagonal should be tested previously
460  diaginv.val[i] = 1.0 / diaginv.val[i];
461  }
462  }
463  }
464  }
465  else {
466  if (nb > 1) {
467  for (i = 0; i < ROW; ++i) {
468  fasp_blas_smat_inv(diaginv.val+i*nb2, nb);
469  }
470  }
471  else {
472  for (i = 0; i < ROW; ++i) {
473  // zero-diagonal should be tested previously
474  diaginv.val[i] = 1.0 / diaginv.val[i];
475  }
476  }
477  }
478 
479  return (diaginv);
480 }
481 
497 {
498  // members of A
499  const INT ROW = A->ROW;
500  const INT COL = A->COL;
501  const INT NNZ = A->NNZ;
502  const INT nb = A->nb;
503  const INT nb2 = nb*nb;
504  const INT size = ROW*nb2;
505  const INT *IA = A->IA;
506  const INT *JA = A->JA;
507  REAL *val = A->val;
508 
509  dBSRmat B;
510  INT *IAb = NULL;
511  INT *JAb = NULL;
512  REAL *valb = NULL;
513 
514  // Create a dBSRmat 'B'
515  B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
516 
517  REAL *diaginv = NULL;
518 
519  INT i,j,k,m,l;
520 
521  // Variables for OpenMP
522  INT nthreads = 1, use_openmp = FALSE;
523  INT myid, mybegin, myend;
524 
525 #ifdef _OPENMP
526  if (ROW > OPENMP_HOLDS) {
527  use_openmp = TRUE;
528  nthreads = FASP_GET_NUM_THREADS();
529  }
530 #endif
531 
532  IAb = B.IA;
533  JAb = B.JA;
534  valb = B.val;
535 
536  memcpy(IAb, IA, (ROW+1)*sizeof(INT));
537  memcpy(JAb, JA, NNZ*sizeof(INT));
538 
539  // allocate memory
540  diaginv = (REAL *)fasp_mem_calloc(size, sizeof(REAL));
541 
542  // get all the diagonal sub-blocks
543  if (use_openmp) {
544 #ifdef _OPENMP
545 #pragma omp parallel for private(myid, i, mybegin, myend, k)
546 #endif
547  for (myid = 0; myid < nthreads; myid++) {
548  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
549  for (i = mybegin; i < myend; ++i) {
550  for (k = IA[i]; k < IA[i+1]; ++k) {
551  if (JA[k] == i)
552  memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
553  }
554  }
555  }
556  }
557  else {
558  for (i = 0; i < ROW; ++i) {
559  for (k = IA[i]; k < IA[i+1]; ++k) {
560  if (JA[k] == i)
561  memcpy(diaginv+i*nb2, val+k*nb2, nb2*sizeof(REAL));
562  }
563  }
564  }
565 
566  // compute the inverses of all the diagonal sub-blocks
567  if (use_openmp) {
568 #ifdef _OPENMP
569 #pragma omp parallel for private(myid, i, mybegin, myend)
570 #endif
571  for (myid = 0; myid < nthreads; myid++) {
572  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
573  if (nb > 1) {
574  for (i = mybegin; i < myend; ++i) {
575  fasp_blas_smat_inv(diaginv+i*nb2, nb);
576  }
577  }
578  else {
579  for (i = mybegin; i < myend; ++i) {
580  // zero-diagonal should be tested previously
581  diaginv[i] = 1.0 / diaginv[i];
582  }
583  }
584  }
585  }
586  else {
587  if (nb > 1) {
588  for (i = 0; i < ROW; ++i) {
589  fasp_blas_smat_inv(diaginv+i*nb2, nb);
590  }
591  }
592  else {
593  for (i = 0; i < ROW; ++i) {
594  // zero-diagonal should be tested previously
595  diaginv[i] = 1.0 / diaginv[i];
596  }
597  }
598  }
599 
600  // compute D^{-1}*A
601  if (use_openmp) {
602 #ifdef _OPENMP
603 #pragma omp parallel for private(myid, mybegin, myend, i, k, m, j, l)
604 #endif
605  for (myid = 0; myid < nthreads; myid++) {
606  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
607  for (i = mybegin; i < myend; ++i) {
608  for (k = IA[i]; k < IA[i+1]; ++k) {
609  m = k*nb2;
610  j = JA[k];
611  if (j == i) {
612  // Identity sub-block
613  memset(valb+m, 0X0, nb2*sizeof(REAL));
614  for (l = 0; l < nb; l ++) valb[m+l*nb+l] = 1.0;
615  }
616  else {
617  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
618  }
619  }
620  }
621  }
622  }
623  else {
624  for (i = 0; i < ROW; ++i) {
625  for (k = IA[i]; k < IA[i+1]; ++k) {
626  m = k*nb2;
627  j = JA[k];
628  if (j == i) {
629  // Identity sub-block
630  memset(valb+m, 0X0, nb2*sizeof(REAL));
631  for (l = 0; l < nb; l ++) valb[m+l*nb+l] = 1.0;
632  }
633  else {
634  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
635  }
636  }
637  }
638  }
639 
640  fasp_mem_free(diaginv);
641 
642  return (B);
643 }
644 
661  REAL *diaginv)
662 {
663  // members of A
664  const INT ROW = A->ROW;
665  const INT COL = A->COL;
666  const INT NNZ = A->NNZ;
667  const INT nb = A->nb, nbp1 = nb+1;
668  const INT nb2 = nb*nb;
669 
670  INT *IA = A->IA;
671  INT *JA = A->JA;
672  REAL *val = A->val;
673 
674  dBSRmat B;
675  INT *IAb = NULL;
676  INT *JAb = NULL;
677  REAL *valb = NULL;
678 
679  INT i,k,m,l,ibegin,iend;
680 
681  // Variables for OpenMP
682  INT nthreads = 1, use_openmp = FALSE;
683  INT myid, mybegin, myend;
684 
685 #ifdef _OPENMP
686  if (ROW > OPENMP_HOLDS) {
687  use_openmp = TRUE;
688  nthreads = FASP_GET_NUM_THREADS();
689  }
690 #endif
691 
692  // Create a dBSRmat 'B'
693  B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
694  IAb = B.IA;
695  JAb = B.JA;
696  valb = B.val;
697 
698  memcpy(IAb, IA, (ROW+1)*sizeof(INT));
699  memcpy(JAb, JA, NNZ*sizeof(INT));
700 
701  // compute D^{-1}*A
702  if (use_openmp) {
703 #ifdef _OPENMP
704 #pragma omp parallel for private (myid, i, mybegin, myend, ibegin, iend, k, m, l)
705 #endif
706  for (myid = 0; myid < nthreads; myid++) {
707  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
708  for (i = mybegin; i < myend; ++i) {
709  ibegin = IA[i]; iend = IA[i+1];
710  for (k = ibegin; k < iend; ++k) {
711  m = k*nb2;
712  if (JA[k] != i) {
713  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
714  }
715  else {
716  // Identity sub-block
717  memset(valb+m, 0X0, nb2*sizeof(REAL));
718  for (l = 0; l < nb; l ++) valb[m+l*nbp1] = 1.0;
719  }
720  }
721  }
722  }
723  }
724  else {
725  // compute D^{-1}*A
726  for (i = 0; i < ROW; ++i) {
727  ibegin = IA[i]; iend = IA[i+1];
728  for (k = ibegin; k < iend; ++k) {
729  m = k*nb2;
730  if (JA[k] != i) {
731  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
732  }
733  else {
734  // Identity sub-block
735  memset(valb+m, 0X0, nb2*sizeof(REAL));
736  for (l = 0; l < nb; l ++) valb[m+l*nbp1] = 1.0;
737  }
738  }
739  }
740  }
741 
742  return (B);
743 }
744 
763  REAL *diaginv)
764 {
765  dBSRmat B;
766  // members of A
767  INT ROW = A->ROW;
768  INT ROW_plus_one = ROW+1;
769  INT COL = A->COL;
770  INT NNZ = A->NNZ;
771  INT nb = A->nb;
772  INT *IA = A->IA;
773  INT *JA = A->JA;
774  REAL *val = A->val;
775 
776  INT *IAb = NULL;
777  INT *JAb = NULL;
778  REAL *valb = NULL;
779 
780  INT nb2 = nb*nb;
781  INT i,j,k,m;
782 
783  INT use_openmp = FALSE;
784 
785 #ifdef _OPENMP
786  INT myid, mybegin, myend, stride_i, nthreads;
787  if ( ROW > OPENMP_HOLDS ) {
788  use_openmp = TRUE;
789  nthreads = FASP_GET_NUM_THREADS();
790  }
791 #endif
792 
793  // Create a dBSRmat 'B'
794  B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
795 
796  IAb = B.IA;
797  JAb = B.JA;
798  valb = B.val;
799 
800  fasp_iarray_cp(ROW_plus_one, IA, IAb);
801  fasp_iarray_cp(NNZ, JA, JAb);
802 
803  switch (nb) {
804 
805  case 2:
806  // main loop
807  if (use_openmp) {
808 #ifdef _OPENMP
809  stride_i = ROW/nthreads;
810 #pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
811  {
812  myid = omp_get_thread_num();
813  mybegin = myid*stride_i;
814  if (myid < nthreads-1) myend = mybegin+stride_i;
815  else myend = ROW;
816  for (i=mybegin; i < myend; ++i) {
817  // get the diagonal sub-blocks
818 
819  k = IA[i];
820  m = k*4;
821  memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
822  fasp_smat_identity_nc2(valb+m);
823 
824  // compute the inverses of the diagonal sub-blocks
825  fasp_blas_smat_inv_nc2(diaginv+i*4);
826  // compute D^{-1}*A
827  for (k = IA[i]+1; k < IA[i+1]; ++k)
828  {
829  m = k*4;
830  j = JA[k];
831  fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
832  }
833  }// end of main loop
834  }
835 #endif
836  }
837  else {
838  // main loop
839  for (i = 0; i < ROW; ++i) {
840  // get the diagonal sub-blocks
841  k = IA[i];
842  m = k*4;
843  memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
844  fasp_smat_identity_nc2(valb+m);
845 
846  // compute the inverses of the diagonal sub-blocks
847  fasp_blas_smat_inv_nc2(diaginv+i*4);
848  // compute D^{-1}*A
849  for (k = IA[i]+1; k < IA[i+1]; ++k) {
850  m = k*4;
851  j = JA[k];
852  fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
853  }
854  }// end of main loop
855  }
856 
857  break;
858 
859  case 3:
860  // main loop
861  if (use_openmp) {
862 #ifdef _OPENMP
863  stride_i = ROW/nthreads;
864 #pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
865  {
866  myid = omp_get_thread_num();
867  mybegin = myid*stride_i;
868  if (myid < nthreads-1) myend = mybegin+stride_i;
869  else myend = ROW;
870  for (i=mybegin; i < myend; ++i) {
871  // get the diagonal sub-blocks
872  for (k = IA[i]; k < IA[i+1]; ++k) {
873  if (JA[k] == i) {
874  m = k*9;
875  memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
876  fasp_smat_identity_nc3(valb+m);
877  }
878  }
879  // compute the inverses of the diagonal sub-blocks
880  fasp_blas_smat_inv_nc3(diaginv+i*9);
881  // compute D^{-1}*A
882  for (k = IA[i]; k < IA[i+1]; ++k) {
883  m = k*9;
884  j = JA[k];
885  if (j != i) fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
886  }
887  }// end of main loop
888  }
889 #endif
890  }
891 
892  else {
893  for (i = 0; i < ROW; ++i) {
894  // get the diagonal sub-blocks
895  for (k = IA[i]; k < IA[i+1]; ++k) {
896  if (JA[k] == i) {
897  m = k*9;
898  memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
899  fasp_smat_identity_nc3(valb+m);
900  }
901  }
902 
903  // compute the inverses of the diagonal sub-blocks
904  fasp_blas_smat_inv_nc3(diaginv+i*9);
905 
906  // compute D^{-1}*A
907  for (k = IA[i]; k < IA[i+1]; ++k) {
908  m = k*9;
909  j = JA[k];
910  if (j != i) fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
911  }
912  }// end of main loop
913  }
914 
915  break;
916 
917  case 5:
918  // main loop
919  if (use_openmp) {
920 #ifdef _OPENMP
921  stride_i = ROW/nthreads;
922 #pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
923  {
924  myid = omp_get_thread_num();
925  mybegin = myid*stride_i;
926  if (myid < nthreads-1) myend = mybegin+stride_i;
927  else myend = ROW;
928  for (i=mybegin; i < myend; ++i) {
929  // get the diagonal sub-blocks
930  for (k = IA[i]; k < IA[i+1]; ++k) {
931  if (JA[k] == i) {
932  m = k*25;
933  memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
934  fasp_smat_identity_nc5(valb+m);
935  }
936  }
937 
938  // compute the inverses of the diagonal sub-blocks
939  fasp_blas_smat_inv_nc5(diaginv+i*25);
940 
941  // compute D^{-1}*A
942  for (k = IA[i]; k < IA[i+1]; ++k) {
943  m = k*25;
944  j = JA[k];
945  if (j != i) fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
946  }
947  }// end of main loop
948  }
949 #endif
950  }
951 
952  else {
953  for (i = 0; i < ROW; ++i) {
954  // get the diagonal sub-blocks
955  for (k = IA[i]; k < IA[i+1]; ++k) {
956  if (JA[k] == i)
957  {
958  m = k*25;
959  memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
960  fasp_smat_identity_nc5(valb+m);
961  }
962  }
963 
964  // compute the inverses of the diagonal sub-blocks
965  fasp_blas_smat_inv_nc5(diaginv+i*25);
966 
967  // compute D^{-1}*A
968  for (k = IA[i]; k < IA[i+1]; ++k) {
969  m = k*25;
970  j = JA[k];
971  if (j != i) fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
972  }
973  }// end of main loop
974  }
975 
976  break;
977 
978  case 7:
979  // main loop
980  if (use_openmp) {
981 #ifdef _OPENMP
982  stride_i = ROW/nthreads;
983 #pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
984  {
985  myid = omp_get_thread_num();
986  mybegin = myid*stride_i;
987  if (myid < nthreads-1) myend = mybegin+stride_i;
988  else myend = ROW;
989  for (i=mybegin; i < myend; ++i) {
990  // get the diagonal sub-blocks
991  for (k = IA[i]; k < IA[i+1]; ++k) {
992  if (JA[k] == i) {
993  m = k*49;
994  memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
995  fasp_smat_identity_nc7(valb+m);
996  }
997  }
998 
999  // compute the inverses of the diagonal sub-blocks
1000  fasp_blas_smat_inv_nc7(diaginv+i*49);
1001 
1002  // compute D^{-1}*A
1003  for (k = IA[i]; k < IA[i+1]; ++k) {
1004  m = k*49;
1005  j = JA[k];
1006  if (j != i) fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1007  }
1008  }// end of main loop
1009  }
1010 #endif
1011  }
1012 
1013  else {
1014  for (i = 0; i < ROW; ++i) {
1015  // get the diagonal sub-blocks
1016  for (k = IA[i]; k < IA[i+1]; ++k) {
1017  if (JA[k] == i) {
1018  m = k*49;
1019  memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1020  fasp_smat_identity_nc7(valb+m);
1021  }
1022  }
1023 
1024  // compute the inverses of the diagonal sub-blocks
1025  fasp_blas_smat_inv_nc7(diaginv+i*49);
1026 
1027  // compute D^{-1}*A
1028  for (k = IA[i]; k < IA[i+1]; ++k) {
1029  m = k*49;
1030  j = JA[k];
1031  if (j != i) fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1032  }
1033  }// end of main loop
1034  }
1035 
1036  break;
1037 
1038  default:
1039  // main loop
1040  if (use_openmp) {
1041 #ifdef _OPENMP
1042  stride_i = ROW/nthreads;
1043 #pragma omp parallel private(myid, mybegin, myend,i,k,m,j) num_threads(nthreads)
1044  {
1045  myid = omp_get_thread_num();
1046  mybegin = myid*stride_i;
1047  if (myid < nthreads-1) myend = mybegin+stride_i;
1048  else myend = ROW;
1049  for (i=mybegin; i < myend; ++i) {
1050  // get the diagonal sub-blocks
1051  for (k = IA[i]; k < IA[i+1]; ++k) {
1052  if (JA[k] == i) {
1053  m = k*nb2;
1054  memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1055  fasp_smat_identity(valb+m, nb, nb2);
1056  }
1057  }
1058 
1059  // compute the inverses of the diagonal sub-blocks
1060  fasp_blas_smat_inv(diaginv+i*nb2, nb);
1061 
1062  // compute D^{-1}*A
1063  for (k = IA[i]; k < IA[i+1]; ++k) {
1064  m = k*nb2;
1065  j = JA[k];
1066  if (j != i) fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1067  }
1068  }// end of main loop
1069  }
1070 #endif
1071  }
1072  else {
1073  for (i = 0; i < ROW; ++i) {
1074  // get the diagonal sub-blocks
1075  for (k = IA[i]; k < IA[i+1]; ++k) {
1076  if (JA[k] == i) {
1077  m = k*nb2;
1078  memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1079  fasp_smat_identity(valb+m, nb, nb2);
1080  }
1081  }
1082 
1083  // compute the inverses of the diagonal sub-blocks
1084  fasp_blas_smat_inv(diaginv+i*nb2, nb);
1085 
1086  // compute D^{-1}*A
1087  for (k = IA[i]; k < IA[i+1]; ++k) {
1088  m = k*nb2;
1089  j = JA[k];
1090  if (j != i) fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1091  }
1092  } // end of main loop
1093  }
1094 
1095  break;
1096  }
1097 
1098  return (B);
1099 }
1100 
1121  REAL *diaginv)
1122 {
1123  // members of A
1124  const INT ROW = A->ROW;
1125  const INT COL = A->COL;
1126  const INT NNZ = A->NNZ;
1127  const INT nb = A->nb;
1128  const INT nb2 = nb*nb;
1129  const INT *IA = A->IA;
1130  const INT *JA = A->JA;
1131  REAL *val = A->val;
1132 
1133  dBSRmat B;
1134  INT *IAb = NULL;
1135  INT *JAb = NULL;
1136  REAL *valb = NULL;
1137 
1138  INT i,k,m;
1139  INT ibegin, iend;
1140 
1141  // Variables for OpenMP
1142  INT nthreads = 1, use_openmp = FALSE;
1143  INT myid, mybegin, myend;
1144 
1145 #ifdef _OPENMP
1146  if (ROW > OPENMP_HOLDS) {
1147  use_openmp = TRUE;
1148  nthreads = FASP_GET_NUM_THREADS();
1149  }
1150 #endif
1151 
1152  // Create a dBSRmat 'B'
1153  B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1154 
1155  IAb = B.IA;
1156  JAb = B.JA;
1157  valb = B.val;
1158 
1159  memcpy(IAb, IA, (ROW+1)*sizeof(INT));
1160  memcpy(JAb, JA, NNZ*sizeof(INT));
1161 
1162  switch (nb) {
1163 
1164  case 2:
1165  if (use_openmp) {
1166 #ifdef _openmp
1167 #pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1168 #endif
1169  for (myid = 0; myid < nthreads; myid++) {
1170  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
1171  for (i = mybegin; i < myend; ++i) {
1172  ibegin = IA[i]; iend = IA[i+1];
1173  // get the diagonal sub-blocks (It is the first block of each row)
1174  m = ibegin*4;
1175  memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
1176  fasp_smat_identity_nc2(valb+m);
1177 
1178  // compute the inverses of the diagonal sub-blocks
1179  fasp_blas_smat_inv_nc2(diaginv+i*4); // fixed by Zheng Li
1180 
1181  // compute D^{-1}*A
1182  for (k = ibegin+1; k < iend; ++k) {
1183  m = k*4;
1184  fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
1185  }
1186  }
1187  }// end of main loop
1188  }
1189  else {
1190  for (i = 0; i < ROW; ++i) {
1191  ibegin = IA[i]; iend = IA[i+1];
1192  // get the diagonal sub-blocks (It is the first block of each row)
1193  m = ibegin*4;
1194  memcpy(diaginv+i*4, val+m, 4*sizeof(REAL));
1195  fasp_smat_identity_nc2(valb+m);
1196 
1197  // compute the inverses of the diagonal sub-blocks
1198  fasp_blas_smat_inv_nc2(diaginv+i*4); // fixed by Zheng Li
1199 
1200  // compute D^{-1}*A
1201  for (k = ibegin+1; k < iend; ++k) {
1202  m = k*4;
1203  fasp_blas_smat_mul_nc2(diaginv+i*4, val+m, valb+m);
1204  }
1205  } // end of main loop
1206  }
1207 
1208  break;
1209 
1210  case 3:
1211  if (use_openmp) {
1212 #ifdef _openmp
1213 #pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1214 #endif
1215  for (myid = 0; myid < nthreads; myid++) {
1216  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
1217  for (i = mybegin; i < myend; ++i) {
1218  ibegin = IA[i]; iend = IA[i+1];
1219  // get the diagonal sub-blocks (It is the first block of each row)
1220  m = ibegin*9;
1221  memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
1222  fasp_smat_identity_nc3(valb+m);
1223  // compute the inverses of the diagonal sub-blocks
1224  fasp_blas_smat_inv_nc3(diaginv+i*9);
1225  // compute D^{-1}*A
1226  for (k = ibegin+1; k < iend; ++k) {
1227  m = k*9;
1228  fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
1229  }
1230  }
1231  }// end of main loop
1232  }
1233  else {
1234  for (i = 0; i < ROW; ++i) {
1235  ibegin = IA[i]; iend = IA[i+1];
1236  // get the diagonal sub-blocks (It is the first block of each row)
1237  m = ibegin*9;
1238  memcpy(diaginv+i*9, val+m, 9*sizeof(REAL));
1239  fasp_smat_identity_nc3(valb+m);
1240 
1241  // compute the inverses of the diagonal sub-blocks
1242  fasp_blas_smat_inv_nc3(diaginv+i*9);
1243 
1244  // compute D^{-1}*A
1245  for (k = ibegin+1; k < iend; ++k) {
1246  m = k*9;
1247  fasp_blas_smat_mul_nc3(diaginv+i*9, val+m, valb+m);
1248  }
1249  }// end of main loop
1250  }
1251 
1252  break;
1253 
1254  case 5:
1255  if (use_openmp) {
1256 #ifdef _OPENMP
1257 #pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1258 #endif
1259  for (myid = 0; myid < nthreads; myid++) {
1260  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
1261  for (i = mybegin; i < myend; ++i) {
1262  // get the diagonal sub-blocks
1263  ibegin = IA[i]; iend = IA[i+1];
1264  m = ibegin*25;
1265  memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1266  fasp_smat_identity_nc5(valb+m);
1267 
1268  // compute the inverses of the diagonal sub-blocks
1269  fasp_blas_smat_inv_nc5(diaginv+i*25);
1270 
1271  // compute D^{-1}*A
1272  for (k = ibegin+1; k < iend; ++k) {
1273  m = k*25;
1274  fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1275  }
1276  }
1277  }
1278  }
1279  else {
1280  for (i = 0; i < ROW; ++i) {
1281  // get the diagonal sub-blocks
1282  ibegin = IA[i]; iend = IA[i+1];
1283  m = ibegin*25;
1284  memcpy(diaginv+i*25, val+m, 25*sizeof(REAL));
1285  fasp_smat_identity_nc5(valb+m);
1286 
1287  // compute the inverses of the diagonal sub-blocks
1288  fasp_blas_smat_inv_nc5(diaginv+i*25);
1289 
1290  // compute D^{-1}*A
1291  for (k = ibegin+1; k < iend; ++k) {
1292  m = k*25;
1293  fasp_blas_smat_mul_nc5(diaginv+i*25, val+m, valb+m);
1294  }
1295  }// end of main loop
1296  }
1297  break;
1298 
1299  case 7:
1300  if (use_openmp) {
1301 #ifdef _OPENMP
1302 #pragma omp parallel for private(myid, i, mybegin, myend, ibegin, iend, m, k)
1303 #endif
1304  for (myid = 0; myid < nthreads; myid++) {
1305  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
1306  for (i = mybegin; i < myend; ++i) {
1307  // get the diagonal sub-blocks
1308  ibegin = IA[i]; iend = IA[i+1];
1309  m = ibegin*49;
1310  memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1311  fasp_smat_identity_nc7(valb+m);
1312 
1313  // compute the inverses of the diagonal sub-blocks
1314  fasp_blas_smat_inv_nc7(diaginv+i*49);
1315 
1316  // compute D^{-1}*A
1317  for (k = ibegin+1; k < iend; ++k) {
1318  m = k*49;
1319  fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1320  }
1321  }
1322  }// end of main loop
1323  }
1324  else {
1325  for (i = 0; i < ROW; ++i) {
1326  // get the diagonal sub-blocks
1327  ibegin = IA[i]; iend = IA[i+1];
1328  m = ibegin*49;
1329  memcpy(diaginv+i*49, val+m, 49*sizeof(REAL));
1330  fasp_smat_identity_nc7(valb+m);
1331 
1332  // compute the inverses of the diagonal sub-blocks
1333  fasp_blas_smat_inv_nc7(diaginv+i*49);
1334 
1335  // compute D^{-1}*A
1336  for (k = ibegin+1; k < iend; ++k) {
1337  m = k*49;
1338  fasp_blas_smat_mul_nc7(diaginv+i*49, val+m, valb+m);
1339  }
1340  }// end of main loop
1341  }
1342 
1343  break;
1344 
1345  default:
1346  if (use_openmp) {
1347 #ifdef _OPENMP
1348 #pragma omp parallel for private(myid, mybegin, myend, i, ibegin, iend, m, k)
1349 #endif
1350  for (myid = 0; myid < nthreads; myid++) {
1351  FASP_GET_START_END(myid, nthreads, ROW, &mybegin, &myend);
1352  for (i = mybegin; i < myend; ++i) {
1353  // get the diagonal sub-blocks
1354  ibegin = IA[i]; iend = IA[i+1];
1355  m = ibegin*nb2;
1356  memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1357  fasp_smat_identity(valb+m, nb, nb2);
1358 
1359  // compute the inverses of the diagonal sub-blocks
1360  fasp_blas_smat_inv(diaginv+i*nb2, nb);
1361 
1362  // compute D^{-1}*A
1363  for (k = ibegin+1; k < iend; ++k) {
1364  m = k*nb2;
1365  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1366  }
1367  }
1368  }// end of main loop
1369  }
1370  else {
1371  for (i = 0; i < ROW; ++i) {
1372  // get the diagonal sub-blocks
1373  ibegin = IA[i]; iend = IA[i+1];
1374  m = ibegin*nb2;
1375  memcpy(diaginv+i*nb2, val+m, nb2*sizeof(REAL));
1376  fasp_smat_identity(valb+m, nb, nb2);
1377 
1378  // compute the inverses of the diagonal sub-blocks
1379  fasp_blas_smat_inv(diaginv+i*nb2, nb);
1380 
1381  // compute D^{-1}*A
1382  for (k = ibegin+1; k < iend; ++k) {
1383  m = k*nb2;
1384  fasp_blas_smat_mul(diaginv+i*nb2, val+m, valb+m, nb);
1385  }
1386  } // end of main loop
1387  }
1388 
1389  break;
1390  }
1391 
1392  return (B);
1393 }
1394 
1412  dBSRmat *A,
1413  REAL *diag )
1414 {
1415  const INT nb2 = A->nb*A->nb;
1416 
1417  INT i,k;
1418 
1419  if ( n==0 || n>A->ROW || n>A->COL ) n = MIN(A->ROW,A->COL);
1420 
1421 #ifdef _OPENMP
1422 #pragma omp parallel for private(i,k) if(n>OPENMP_HOLDS)
1423 #endif
1424  for (i = 0; i < n; ++i) {
1425  for (k = A->IA[i]; k < A->IA[i+1]; ++k) {
1426  if (A->JA[k] == i) {
1427  memcpy(diag+i*nb2, A->val+k*nb2, nb2*sizeof(REAL));
1428  break;
1429  }
1430  }
1431  }
1432 }
1433 
1450  REAL *DL,
1451  REAL *DU)
1452 {
1453 
1454  // members of A
1455  INT ROW = A->ROW;
1456  INT ROW_plus_one = ROW+1;
1457  INT COL = A->COL;
1458  INT NNZ = A->NNZ;
1459  INT nb = A->nb;
1460  INT *IA = A->IA;
1461  INT *JA = A->JA;
1462  REAL *val = A->val;
1463 
1464  INT *IAb = NULL;
1465  INT *JAb = NULL;
1466  REAL *valb = NULL;
1467 
1468  INT nb2 = nb*nb;
1469  INT i, j, k;
1470 
1471  // Create a dBSRmat 'B'
1472  dBSRmat B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1473 
1474  IAb = B.IA;
1475  JAb = B.JA;
1476  valb = B.val;
1477 
1478  fasp_iarray_cp(ROW_plus_one, IA, IAb);
1479  fasp_iarray_cp(NNZ, JA, JAb);
1480 
1481  // work array
1482  REAL *temp = (REAL *)fasp_mem_calloc(nb2, sizeof(REAL));
1483 
1484  // get DL and DU
1485  switch (nb) {
1486 
1487  case 2:
1488 
1489  for (i=0; i<ROW; i++){
1490 
1491  for (j=IA[i]; j<IA[i+1]; j++){
1492 
1493  if (JA[j] == i){
1494 
1495  temp[0] = val[j*nb2];
1496  temp[1] = val[j*nb2+1];
1497  temp[2] = val[j*nb2+2];
1498  temp[3] = val[j*nb2+3];
1499 
1500  // form DL
1501  DL[i*nb2] = 1.0;
1502  DL[i*nb2+1] = 0.0;
1503  DL[i*nb2+2] = -temp[2]/temp[0];
1504  DL[i*nb2+3] = 1.0;
1505  //DL[i*nb2+2] = -temp[2]/(temp[0]*s);
1506  //DL[i*nb2+3] = 1.0/s;
1507 
1508  // form DU
1509  DU[i*nb2] = 1.0;
1510  DU[i*nb2+1] = -temp[1]/temp[0];
1511  DU[i*nb2+2] = 0.0;
1512  DU[i*nb2+3] = 1.0;
1513 
1514  break;
1515 
1516  } // end of if (JA[j] == i)
1517 
1518  } // end of for (j=IA[i]; j<IA[i+1]; j++)
1519 
1520  } // end of for (i=0; i<ROW; i++)
1521 
1522  break;
1523 
1524  case 3:
1525 
1526  for (i=0; i<ROW; i++){
1527 
1528  for (j=IA[i]; j<IA[i+1]; j++){
1529 
1530  if (JA[j] == i){
1531 
1532  temp[0] = val[j*nb2];
1533  temp[1] = val[j*nb2+1];
1534  temp[2] = val[j*nb2+2];
1535  temp[3] = val[j*nb2+3];
1536  temp[4] = val[j*nb2+4];
1537  temp[5] = val[j*nb2+5];
1538  temp[6] = val[j*nb2+6];
1539  temp[7] = val[j*nb2+7];
1540  temp[8] = val[j*nb2+8];
1541 
1542  // some auxiliry variables
1543  REAL s22 = temp[4] - ((temp[1]*temp[3])/temp[0]);
1544  REAL s23 = temp[5] - ((temp[2]*temp[3])/temp[0]);
1545  REAL s32 = temp[7] - ((temp[1]*temp[6])/temp[0]);
1546 
1547  // form DL
1548  DL[i*nb2] = 1.0;
1549  DL[i*nb2+1] = 0.0;
1550  DL[i*nb2+2] = 0.0;
1551  DL[i*nb2+3] = -temp[3]/temp[0];
1552  DL[i*nb2+4] = 1.0;
1553  DL[i*nb2+5] = 0.0;
1554  DL[i*nb2+6] = -temp[6]/temp[0] + (temp[3]/temp[0])*(s32/s22);
1555  DL[i*nb2+7] = -s32/s22;
1556  DL[i*nb2+8] = 1.0;
1557 
1558  // form DU
1559  DU[i*nb2] = 1.0;
1560  DU[i*nb2+1] = -temp[1]/temp[0];
1561  DU[i*nb2+2] = -temp[2]/temp[0] + (temp[1]/temp[0])*(s23/s22);
1562  DU[i*nb2+3] = 0.0;
1563  DU[i*nb2+4] = 1.0;
1564  DU[i*nb2+5] = -s23/s22;
1565  DU[i*nb2+6] = 0.0;
1566  DU[i*nb2+7] = 0.0;
1567  DU[i*nb2+8] = 1.0;
1568 
1569  break;
1570 
1571  } // end of if (JA[j] == i)
1572 
1573  } // end of for (j=IA[i]; j<IA[i+1]; j++)
1574 
1575  } // end of for (i=0; i<ROW; i++)
1576 
1577  break;
1578 
1579  default:
1580  printf("### ERROR: Only works for nb = 2 or 3 now!");
1581  break;
1582 
1583  } // end of switch
1584 
1585  // compute B = DL*A*DU
1586  switch (nb) {
1587 
1588  case 2:
1589 
1590  for (i=0; i<ROW; i++){
1591 
1592  for (j=IA[i]; j<IA[i+1]; j++){
1593 
1594  k = JA[j];
1595 
1596  // left multiply DL
1597  fasp_blas_smat_mul_nc2(DL+i*nb2, val+j*nb2, temp);
1598 
1599  // right multiply DU
1600  fasp_blas_smat_mul_nc2(temp, DU+k*nb2, valb+j*nb2);
1601 
1602  // for diagonal block, set it to be diagonal matrix
1603  if (JA[j] == i){
1604 
1605  valb[j*nb2+1] = 0.0;
1606  valb[j*nb2+2] = 0.0;
1607 
1608  } // end if (JA[j] == i)
1609 
1610 
1611  } // end for (j=IA[i]; j<IA[i+1]; j++)
1612 
1613  } // end of for (i=0; i<ROW; i++)
1614 
1615  break;
1616 
1617  case 3:
1618 
1619  for (i=0; i<ROW; i++){
1620 
1621  for (j=IA[i]; j<IA[i+1]; j++){
1622 
1623  k = JA[j];
1624 
1625  // left multiply DL
1626  fasp_blas_smat_mul_nc3(DL+i*nb2, val+j*nb2, temp);
1627 
1628  // right multiply DU
1629  fasp_blas_smat_mul_nc3(temp, DU+k*nb2, valb+j*nb2);
1630 
1631  // for diagonal block, set it to be diagonal matrix
1632  if (JA[j] == i){
1633 
1634  valb[j*nb2+1] = 0.0;
1635  valb[j*nb2+2] = 0.0;
1636  valb[j*nb2+3] = 0.0;
1637  valb[j*nb2+5] = 0.0;
1638  valb[j*nb2+6] = 0.0;
1639  valb[j*nb2+7] = 0.0;
1640  if (ABS(valb[j*nb2+4]) < SMALLREAL) valb[j*nb2+4] = SMALLREAL;
1641  if (ABS(valb[j*nb2+8]) < SMALLREAL) valb[j*nb2+8] = SMALLREAL;
1642 
1643  } // end if (JA[j] == i)
1644 
1645  } // end for (j=IA[i]; j<IA[i+1]; j++)
1646 
1647  } // end of for (i=0; i<ROW; i++)
1648 
1649  break;
1650 
1651  default:
1652  printf("### ERROR: Only works for nb = 2 or 3 now!");
1653  break;
1654 
1655  }
1656 
1657  // return
1658  return B;
1659 
1660 }
1661 
1678  REAL *DL,
1679  REAL *DU)
1680 {
1681 
1682  // members of A
1683  INT ROW = A->ROW;
1684  INT ROW_plus_one = ROW+1;
1685  INT COL = A->COL;
1686  INT NNZ = A->NNZ;
1687  INT nb = A->nb;
1688  INT *IA = A->IA;
1689  INT *JA = A->JA;
1690  REAL *val = A->val;
1691 
1692  INT *IAb = NULL;
1693  INT *JAb = NULL;
1694  REAL *valb = NULL;
1695 
1696  INT nb2 = nb*nb;
1697  INT i,j,k;
1698 
1699  REAL sqt3, sqt4, sqt8;
1700 
1701  // Create a dBSRmat 'B'
1702  dBSRmat B = fasp_dbsr_create(ROW, COL, NNZ, nb, 0);
1703 
1704  REAL *temp = (REAL *)fasp_mem_calloc(nb*nb, sizeof(REAL));
1705 
1706  IAb = B.IA;
1707  JAb = B.JA;
1708  valb = B.val;
1709 
1710  fasp_iarray_cp(ROW_plus_one, IA, IAb);
1711  fasp_iarray_cp(NNZ, JA, JAb);
1712 
1713  // get DL and DU
1714  switch (nb) {
1715  case 2:
1716  for (i=0; i<ROW; i++){
1717  for (j=IA[i]; j<IA[i+1]; j++){
1718  if (JA[j] == i){
1719  REAL temp0 = val[j*nb2];
1720  REAL temp1 = val[j*nb2+1];
1721  REAL temp2 = val[j*nb2+2];
1722  REAL temp3 = val[j*nb2+3];
1723 
1724  if (ABS(temp3) < SMALLREAL) temp3 = SMALLREAL;
1725 
1726  sqt3 = sqrt(ABS(temp3));
1727 
1728  // form DL
1729  DL[i*nb2] = 1.0;
1730  DL[i*nb2+1] = 0.0;
1731  DL[i*nb2+2] = -temp2/temp0/sqt3;
1732  DL[i*nb2+3] = 1.0/sqt3;
1733 
1734  // form DU
1735  DU[i*nb2] = 1.0;
1736  DU[i*nb2+1] = -temp1/temp0/sqt3;
1737  DU[i*nb2+2] = 0.0;
1738  DU[i*nb2+3] = 1.0/sqt3;
1739  break;
1740 
1741  }
1742  }
1743  }
1744 
1745  break;
1746 
1747  case 3:
1748  for (i=0; i<ROW; i++){
1749  for (j=IA[i]; j<IA[i+1]; j++){
1750  if (JA[j] == i){
1751  REAL temp0 = val[j*nb2];
1752  REAL temp1 = val[j*nb2+1];
1753  REAL temp2 = val[j*nb2+2];
1754  REAL temp3 = val[j*nb2+3];
1755  REAL temp4 = val[j*nb2+4];
1756  REAL temp5 = val[j*nb2+5];
1757  REAL temp6 = val[j*nb2+6];
1758  REAL temp7 = val[j*nb2+7];
1759  REAL temp8 = val[j*nb2+8];
1760 
1761  if (ABS(temp4) < SMALLREAL) temp4 = SMALLREAL;
1762  if (ABS(temp8) < SMALLREAL) temp8 = SMALLREAL;
1763 
1764  sqt4 = sqrt(ABS(temp4));
1765  sqt8 = sqrt(ABS(temp8));
1766 
1767  // some auxiliary variables
1768  REAL s22 = temp4 - ((temp1*temp3)/temp0);
1769  REAL s23 = temp5 - ((temp2*temp3)/temp0);
1770  REAL s32 = temp7 - ((temp1*temp6)/temp0);
1771 
1772  // form DL
1773  DL[i*nb2] = 1.0;
1774  DL[i*nb2+1] = 0.0;
1775  DL[i*nb2+2] = 0.0;
1776  DL[i*nb2+3] = -temp3/temp0/sqt4;
1777  DL[i*nb2+4] = 1.0/sqt4;
1778  DL[i*nb2+5] = 0.0;
1779  DL[i*nb2+6] = (-temp6/temp0 + (temp3/temp0)*(s32/s22))/sqt8;
1780  DL[i*nb2+7] = -s32/s22/sqt8;
1781  DL[i*nb2+8] = 1.0/sqt8;
1782 
1783  // form DU
1784  DU[i*nb2] = 1.0;
1785  DU[i*nb2+1] = -temp1/temp0/sqt4;
1786  DU[i*nb2+2] = (-temp2/temp0 + (temp1/temp0)*(s23/s22))/sqt8;
1787  DU[i*nb2+3] = 0.0;
1788  DU[i*nb2+4] = 1.0/sqt4;
1789  DU[i*nb2+5] = -s23/s22/sqt8;
1790  DU[i*nb2+6] = 0.0;
1791  DU[i*nb2+7] = 0.0;
1792  DU[i*nb2+8] = 1.0/sqt8;
1793 
1794  break;
1795 
1796  }
1797  }
1798  }
1799 
1800  break;
1801 
1802  default:
1803  printf("### ERROR: Only works for nb = 2 or 3 now!");
1804  break;
1805 
1806  } // end of switch
1807 
1808  // compute B = DL*A*DU
1809  switch (nb) {
1810 
1811  case 2:
1812  for (i=0; i<ROW; i++){
1813  for (j=IA[i]; j<IA[i+1]; j++){
1814  k = JA[j];
1815  // left multiply DL
1816  fasp_blas_smat_mul_nc2(DL+i*nb2, val+j*nb2, temp);
1817  // right multiply DU
1818  fasp_blas_smat_mul_nc2(temp, DU+k*nb2, valb+j*nb2);
1819  // for diagonal block, set it to be diagonal matrix
1820  if (JA[j] == i){
1821  valb[j*nb2+1] = 0.0;
1822  valb[j*nb2+2] = 0.0;
1823  if (ABS(valb[j*nb2+3]) < SMALLREAL) valb[j*nb2+3] = SMALLREAL;
1824  }
1825  }
1826  }
1827 
1828  break;
1829 
1830  case 3:
1831  for (i=0; i<ROW; i++){
1832  for (j=IA[i]; j<IA[i+1]; j++){
1833  k = JA[j];
1834  // left multiply DL
1835  fasp_blas_smat_mul_nc3(DL+i*nb2, val+j*nb2, temp);
1836  // right multiply DU
1837  fasp_blas_smat_mul_nc3(temp, DU+k*nb2, valb+j*nb2);
1838  // for diagonal block, set it to be diagonal matrix
1839  if (JA[j] == i){
1840  valb[j*nb2+1] = 0.0;
1841  valb[j*nb2+2] = 0.0;
1842  valb[j*nb2+3] = 0.0;
1843  valb[j*nb2+5] = 0.0;
1844  valb[j*nb2+6] = 0.0;
1845  valb[j*nb2+7] = 0.0;
1846  if (ABS(valb[j*nb2+4]) < SMALLREAL) valb[j*nb2+4] = SMALLREAL;
1847  if (ABS(valb[j*nb2+8]) < SMALLREAL) valb[j*nb2+8] = SMALLREAL;
1848  }
1849  }
1850  }
1851  break;
1852 
1853  default:
1854  printf("### ERROR: Only works for nb = 2 or 3 now!");
1855  break;
1856  }
1857 
1858  // return
1859  return B;
1860 
1861 }
1862 
1863 /*---------------------------------*/
1864 /*-- End of File --*/
1865 /*---------------------------------*/
#define TRUE
Definition of logic type.
Definition: fasp_const.h:67
void fasp_smat_identity_nc3(REAL *a)
Set a 3*3 full matrix to be a identity.
Definition: smat.c:665
void fasp_dbsr_cp(dBSRmat *A, dBSRmat *B)
copy a dCSRmat to a new one B=A
Definition: sparse_bsr.c:181
#define ERROR_INPUT_PAR
Definition: fasp_const.h:31
dBSRmat fasp_dbsr_diagLU(dBSRmat *A, REAL *DL, REAL *DU)
Compute B := DL*A*DU. We decompose each diagonal block of A into LDU form and DL = diag(L^{-1}) and D...
Definition: sparse_bsr.c:1449
void fasp_blas_smat_mul_nc2(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 2* matrices a and b, stored in c.
Definition: blas_smat.c:233
#define REAL
Definition: fasp.h:67
dBSRmat fasp_dbsr_create(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner)
Create BSR sparse matrix data memory space.
Definition: sparse_bsr.c:36
void fasp_smat_identity_nc5(REAL *a)
Set a 5*5 full matrix to be a identity.
Definition: smat.c:682
INT fasp_blas_smat_inv(REAL *a, const INT n)
Compute the inverse matrix of a small full matrix a.
Definition: smat.c:554
INT nb
dimension of each sub-block
Definition: fasp_block.h:56
void fasp_blas_smat_mul_nc5(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 5*5 matrices a and b, stored in c.
Definition: blas_smat.c:299
Block sparse row storage matrix of REAL type.
Definition: fasp_block.h:44
REAL * val
actual vector entries
Definition: fasp.h:348
void * fasp_mem_calloc(LONGLONG size, INT type)
1M = 1024*1024
Definition: memory.c:60
Vector with n entries of REAL type.
Definition: fasp.h:342
void fasp_blas_smat_mul(REAL *a, REAL *b, REAL *c, const INT n)
Compute the matrix product of two small full matrices a and b, stored in c.
Definition: blas_smat.c:448
#define INT
Definition: fasp.h:64
#define FASP_SUCCESS
Definition of return status and error messages.
Definition: fasp_const.h:27
void fasp_blas_smat_mul_nc3(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 3*3 matrices a and b, stored in c.
Definition: blas_smat.c:262
void FASP_GET_START_END(INT procid, INT nprocs, INT n, INT *start, INT *end)
Assign Load to each thread.
Definition: threads.c:83
void fasp_mem_free(void *mem)
Free up previous allocated memory body.
Definition: memory.c:150
dBSRmat fasp_dbsr_diaginv4(dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: sparse_bsr.c:1120
REAL * val
Definition: fasp_block.h:67
INT NNZ
number of nonzero sub-blocks in matrix A, NNZ
Definition: fasp_block.h:53
void fasp_iarray_cp(const INT n, INT *x, INT *y)
Copy an array to the other y=x.
Definition: array.c:185
#define OPENMP_HOLDS
Definition: fasp_const.h:248
INT storage_manner
storage manner for each sub-block
Definition: fasp_block.h:59
Main header file for FASP.
dBSRmat fasp_dbsr_diaginv(dBSRmat *A)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: sparse_bsr.c:496
void fasp_blas_smat_inv_nc7(REAL *a)
Compute the inverse matrix of a 7*7 matrix a.
Definition: smat.c:389
dBSRmat fasp_dbsr_diaginv3(dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: sparse_bsr.c:762
void fasp_dbsr_alloc(const INT ROW, const INT COL, const INT NNZ, const INT nb, const INT storage_manner, dBSRmat *A)
Allocate memory space for BSR format sparse matrix.
Definition: sparse_bsr.c:87
void fasp_blas_smat_inv_nc5(REAL *a)
Compute the inverse matrix of a 5*5 full matrix A (in place)
Definition: smat.c:173
void fasp_blas_smat_inv_nc2(REAL *a)
Compute the inverse matrix of a 2*2 full matrix A (in place)
Definition: smat.c:25
void fasp_smat_identity(REAL *a, const INT n, const INT n2)
Set a n*n full matrix to be a identity.
Definition: smat.c:728
INT row
number of rows
Definition: fasp.h:345
INT COL
number of cols of sub-blocks in matrix A, N
Definition: fasp_block.h:50
void fasp_blas_smat_inv_nc3(REAL *a)
Compute the inverse matrix of a 3*3 full matrix A (in place)
Definition: smat.c:61
void fasp_blas_smat_mul_nc7(REAL *a, REAL *b, REAL *c)
Compute the matrix product of two 7*7 matrices a and b, stored in c.
Definition: blas_smat.c:358
#define ABS(a)
Definition: fasp.h:74
#define SHORT
FASP integer and floating point numbers.
Definition: fasp.h:63
#define MIN(a, b)
Definition: fasp.h:73
void fasp_smat_identity_nc2(REAL *a)
Set a 2*2 full matrix to be a identity.
Definition: smat.c:648
dvector fasp_dbsr_getdiaginv(dBSRmat *A)
Get D^{-1} of matrix A.
Definition: sparse_bsr.c:392
void fasp_dbsr_null(dBSRmat *A)
Initialize sparse matrix on structured grid.
Definition: sparse_bsr.c:158
INT ROW
number of rows of sub-blocks in matrix A, M
Definition: fasp_block.h:47
void fasp_dbsr_getdiag(INT n, dBSRmat *A, REAL *diag)
Abstract the diagonal blocks of a BSR matrix.
Definition: sparse_bsr.c:1411
INT * JA
Definition: fasp_block.h:74
SHORT fasp_dbsr_diagpref(dBSRmat *A)
Reorder the column and data arrays of a square BSR matrix, so that the first entry in each row is the...
Definition: sparse_bsr.c:292
INT fasp_dbsr_trans(dBSRmat *A, dBSRmat *AT)
Find A^T from given dBSRmat matrix A.
Definition: sparse_bsr.c:208
#define FALSE
Definition: fasp_const.h:68
dBSRmat fasp_dbsr_diagLU2(dBSRmat *A, REAL *DL, REAL *DU)
Compute B := DL*A*DU. We decompose each diagonal block of A into LDU form and DL = diag(L^{-1}) and D...
Definition: sparse_bsr.c:1677
void fasp_iarray_set(const INT n, INT *x, const INT val)
Set initial value for an array to be x=val.
Definition: array.c:107
INT * IA
integer array of row pointers, the size is ROW+1
Definition: fasp_block.h:70
void fasp_dbsr_free(dBSRmat *A)
Free memory space for BSR format sparse matrix.
Definition: sparse_bsr.c:133
dBSRmat fasp_dbsr_diaginv2(dBSRmat *A, REAL *diaginv)
Compute B := D^{-1}*A, where 'D' is the block diagonal part of A.
Definition: sparse_bsr.c:660
#define SMALLREAL
Definition: fasp_const.h:238
void fasp_smat_identity_nc7(REAL *a)
Set a 7*7 full matrix to be a identity.
Definition: smat.c:703