PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
utility_impl.hpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2012 The Regents of the University of California,
3  through Lawrence Berkeley National Laboratory.
4 
5 Author: Lin Lin and Mathias Jacquelin
6 
7 This file is part of PEXSI. All rights reserved.
8 
9 Redistribution and use in source and binary forms, with or without
10 modification, are permitted provided that the following conditions are met:
11 
12 (1) Redistributions of source code must retain the above copyright notice, this
13 list of conditions and the following disclaimer.
14 (2) Redistributions in binary form must reproduce the above copyright notice,
15 this list of conditions and the following disclaimer in the documentation
16 and/or other materials provided with the distribution.
17 (3) Neither the name of the University of California, Lawrence Berkeley
18 National Laboratory, U.S. Dept. of Energy nor the names of its contributors may
19 be used to endorse or promote products derived from this software without
20 specific prior written permission.
21 
22 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24 WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25 DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
26 ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27 (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
29 ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33 You are under no obligation whatsoever to provide any bug fixes, patches, or
34 upgrades to the features, functionality or performance of the source code
35 ("Enhancements") to anyone; however, if you choose to make your Enhancements
36 available either publicly, or directly to Lawrence Berkeley National
37 Laboratory, without imposing a separate written license agreement for such
38 Enhancements, then you hereby grant the following license: a non-exclusive,
39 royalty-free perpetual license to install, use, modify, prepare derivative
40 works, incorporate into other computer software, distribute, and sublicense
41 such enhancements or derivative works thereof, in binary and source code form.
42  */
46 #ifndef _PEXSI_UTILITY_IMPL_HPP_
47 #define _PEXSI_UTILITY_IMPL_HPP_
48 
49 #include <numeric>
50 
51 //using namespace std;
52 using std::ifstream;
53 using std::ofstream;
54 using std::vector;
55 using std::cerr;
56 
57 
58 namespace PEXSI{
59 
60 // *********************************************************************
61 // Sparse Matrix
62 // *********************************************************************
63 
64 //---------------------------------------------------------
65 inline void ReadSparseMatrix ( const char* filename, SparseMatrix<Real>& spmat )
66 {
67 
68  // FIXME
69  // Binary format
70  if( 1 ){
71  std::istringstream iss;
72  SharedRead( filename, iss );
73  deserialize( spmat.size, iss, NO_MASK );
74  deserialize( spmat.nnz, iss, NO_MASK );
75  deserialize( spmat.colptr, iss, NO_MASK );
76  deserialize( spmat.rowind, iss, NO_MASK );
77  deserialize( spmat.nzval, iss, NO_MASK );
78  }
79 
80  // Ascii format
81  if( 0 ) {
82  ifstream fin(filename);
83  fin >> spmat.size >> spmat.nnz;
84 
85  spmat.colptr.Resize( spmat.size+1 );
86  spmat.rowind.Resize( spmat.nnz );
87  spmat.nzval.Resize ( spmat.nnz );
88 
89  for( Int i = 0; i < spmat.size + 1; i++ ){
90  fin >> spmat.colptr(i);
91  }
92 
93  for( Int i = 0; i < spmat.nnz; i++ ){
94  fin >> spmat.rowind(i);
95  }
96 
97  for( Int i = 0; i < spmat.nnz; i++ ){
98  fin >> spmat.nzval(i);
99  }
100 
101  fin.close();
102  }
103 
104  return ;
105 } // ----- end of function ReadSparseMatrix -----
106 
107 //---------------------------------------------------------
108 inline void ReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
109 {
110  // Get the processor information within the current communicator
111  MPI_Barrier( comm );
112  Int mpirank; MPI_Comm_rank(comm, &mpirank);
113  Int mpisize; MPI_Comm_size(comm, &mpisize);
114  MPI_Status mpistat;
115  std::ifstream fin;
116 
117  // FIXME Maybe change to MPI_Comm_Dup.
118  pspmat.comm = comm;
119 
120  // Read basic information
121  if( mpirank == 0 ){
122  fin.open(filename);
123  if( !fin.good() ){
124  ErrorHandling( "File cannot be opened!" );
125  }
126  fin.read((char*)&pspmat.size, sizeof(Int));
127  fin.read((char*)&pspmat.nnz, sizeof(Int));
128  }
129 
130  // FIXME Maybe need LongInt format to read the number of nonzeros later
131 
132  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
133  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
134 
135  // Read colptr
136  IntNumVec colptr(pspmat.size+1);
137  if( mpirank == 0 ){
138  Int tmp;
139  fin.read((char*)&tmp, sizeof(Int));
140 
141  if( tmp != pspmat.size+1 ){
142  ErrorHandling( "colptr is not of the right size." );
143  }
144 
145  fin.read((char*)colptr.Data(), sizeof(Int)*tmp);
146 
147  }
148 
149  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
150 
151  // Compute the number of columns on each processor
152  IntNumVec numColLocalVec(mpisize);
153  Int numColLocal, numColFirst;
154  numColFirst = pspmat.size / mpisize;
155  SetValue( numColLocalVec, numColFirst );
156  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
157  numColLocal = numColLocalVec[mpirank];
158 
159  pspmat.colptrLocal.Resize( numColLocal + 1 );
160  for( Int i = 0; i < numColLocal + 1; i++ ){
161  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
162  }
163 
164  // Calculate nnz_loc on each processor
165  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
166 
167  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
168  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
169 
170  // Read and distribute the row indices
171  if( mpirank == 0 ){
172  Int tmp;
173  fin.read((char*)&tmp, sizeof(Int));
174 
175  if( tmp != pspmat.nnz ){
176  std::ostringstream msg;
177  msg
178  << "The number of nonzeros in row indices do not match." << std::endl
179  << "nnz = " << pspmat.nnz << std::endl
180  << "size of row indices = " << tmp << std::endl;
181  ErrorHandling( msg.str().c_str() );
182  }
183  IntNumVec buf;
184  Int numRead;
185  for( Int ip = 0; ip < mpisize; ip++ ){
186  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
187  colptr[ip*numColFirst];
188  buf.Resize(numRead);
189  fin.read( (char*)buf.Data(), numRead*sizeof(Int) );
190 
191  if( ip > 0 ){
192  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
193  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
194  }
195  else{
196  pspmat.rowindLocal = buf;
197  }
198  }
199  }
200  else{
201  Int numRead;
202  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
203  if( numRead != pspmat.nnzLocal ){
204  std::ostringstream msg;
205  msg << "The number of columns in row indices do not match." << std::endl
206  << "numRead = " << numRead << std::endl
207  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
208  ErrorHandling( msg.str().c_str() );
209  }
210 
211  pspmat.rowindLocal.Resize( numRead );
212  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
213  }
214 
215  // Read and distribute the nonzero values
216  if( mpirank == 0 ){
217  Int tmp;
218  fin.read((char*)&tmp, sizeof(Int));
219 
220  if( tmp != pspmat.nnz ){
221  std::ostringstream msg;
222  msg
223  << "The number of nonzeros in values do not match." << std::endl
224  << "nnz = " << pspmat.nnz << std::endl
225  << "size of values = " << tmp << std::endl;
226  ErrorHandling( msg.str().c_str() );
227  }
228  NumVec<Real> buf;
229  Int numRead;
230  for( Int ip = 0; ip < mpisize; ip++ ){
231  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
232  colptr[ip*numColFirst];
233  buf.Resize(numRead);
234  fin.read( (char*)buf.Data(), numRead*sizeof(Real) );
235 
236  if( ip > 0 ){
237  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
238  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
239  }
240  else{
241  pspmat.nzvalLocal = buf;
242  }
243  }
244  }
245  else{
246  Int numRead;
247  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
248  if( numRead != pspmat.nnzLocal ){
249  std::ostringstream msg;
250  msg << "The number of columns in values do not match." << std::endl
251  << "numRead = " << numRead << std::endl
252  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
253  ErrorHandling( msg.str().c_str() );
254  }
255 
256  pspmat.nzvalLocal.Resize( numRead );
257  MPI_Recv( pspmat.nzvalLocal.Data(), numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
258  }
259 
260  // Close the file
261  if( mpirank == 0 ){
262  fin.close();
263  }
264 
265 
266 
267  MPI_Barrier( comm );
268 
269 
270  return ;
271 } // ----- end of function ReadDistSparseMatrix -----
272 
273 inline void ParaWriteDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
274 {
275  // Get the processor information within the current communicator
276  MPI_Barrier( comm );
277  Int mpirank; MPI_Comm_rank(comm, &mpirank);
278  Int mpisize; MPI_Comm_size(comm, &mpisize);
279  MPI_Status mpistat;
280  Int err = 0;
281 
282 
283 
284  int filemode = MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN;
285 
286  MPI_File fout;
287  MPI_Status status;
288 
289 
290 
291  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fout);
292 
293  if (err != MPI_SUCCESS) {
294  ErrorHandling( "File cannot be opened!" );
295  }
296 
297  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
298  // Write header
299  if( mpirank == 0 ){
300  err = MPI_File_write_at(fout, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
301  err = MPI_File_write_at(fout, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
302  }
303 
304 
305  // Compute the number of columns on each processor
306  Int numColLocal = pspmat.colptrLocal.m()-1;
307  Int numColFirst = pspmat.size / mpisize;
308  IntNumVec colptrChunk(numColLocal+1);
309 
310  Int prev_nz = 0;
311  MPI_Exscan(&pspmat.nnzLocal, &prev_nz, 1, MPI_INT, MPI_SUM, comm);
312 
313  for( Int i = 0; i < numColLocal + 1; i++ ){
314  colptrChunk[i] = pspmat.colptrLocal[i] + prev_nz;
315  }
316 
317 
318  MPI_Datatype memtype, filetype;
319  MPI_Aint disps[6];
320  int blklens[6];
321  MPI_Datatype types[6] = {MPI_INT,MPI_INT, MPI_INT,MPI_INT, MPI_INT,MPI_DOUBLE};
322 
323  /* set block lengths (same for both types) */
324  blklens[0] = (mpirank==0)?1:0;
325  blklens[1] = numColLocal+1;
326  blklens[2] = (mpirank==0)?1:0;
327  blklens[3] = pspmat.nnzLocal;
328  blklens[4] = (mpirank==0)?1:0;
329  blklens[5] = pspmat.nnzLocal;
330 
331 
332 
333 
334  //Calculate offsets
335  MPI_Offset myColPtrOffset, myRowIdxOffset, myNzValOffset;
336  myColPtrOffset = 3*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
337  myRowIdxOffset = 3*sizeof(int) + (pspmat.size +1 + prev_nz)*sizeof(Int);
338  myNzValOffset = 4*sizeof(int) + (pspmat.size +1 + pspmat.nnz)*sizeof(Int)+ prev_nz*sizeof(Real);
339  disps[0] = 2*sizeof(int);
340  disps[1] = myColPtrOffset;
341  disps[2] = myRowIdxOffset;
342  disps[3] = sizeof(int)+myRowIdxOffset;
343  disps[4] = myNzValOffset;
344  disps[5] = sizeof(int)+myNzValOffset;
345 
346 
347 
348 #if ( _DEBUGlevel_ >= 1 )
349  char msg[200];
350  char * tmp = msg;
351  tmp += sprintf(tmp,"P%d ",mpirank);
352  for(int i = 0; i<6; ++i){
353  if(i==5)
354  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(double));
355  else
356  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(int));
357  }
358  tmp += sprintf(tmp,"\n");
359  printf("%s",msg);
360 #endif
361 
362 
363 
364 
365  MPI_Type_create_struct(6, blklens, disps, types, &filetype);
366  MPI_Type_commit(&filetype);
367 
368  /* create memory type */
369  Int np1 = pspmat.size+1;
370  MPI_Address( (void *)&np1, &disps[0]);
371  MPI_Address(colptrChunk.Data(), &disps[1]);
372  MPI_Address( (void *)&pspmat.nnz, &disps[2]);
373  MPI_Address((void *)pspmat.rowindLocal.Data(), &disps[3]);
374  MPI_Address( (void *)&pspmat.nnz, &disps[4]);
375  MPI_Address((void *)pspmat.nzvalLocal.Data(), &disps[5]);
376 
377  MPI_Type_create_struct(6, blklens, disps, types, &memtype);
378  MPI_Type_commit(&memtype);
379 
380 
381 
382  /* set file view */
383  err = MPI_File_set_view(fout, 0, MPI_BYTE, filetype, "native",MPI_INFO_NULL);
384 
385  /* everyone writes their own row offsets, columns, and
386  * data with one big noncontiguous write (in memory and
387  * file)
388  */
389  err = MPI_File_write_all(fout, MPI_BOTTOM, 1, memtype, &status);
390 
391  MPI_Type_free(&filetype);
392  MPI_Type_free(&memtype);
393 
394 
395 
396 
397 
398  MPI_Barrier( comm );
399 
400  MPI_File_close(&fout);
401 
402  return ;
403 } // ----- end of function ParaWriteDistSparseMatrix -----
404 
405 inline void ParaReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
406 {
407  // Get the processor information within the current communicator
408  MPI_Barrier( comm );
409  Int mpirank; MPI_Comm_rank(comm, &mpirank);
410  Int mpisize; MPI_Comm_size(comm, &mpisize);
411  MPI_Status mpistat;
412  MPI_Datatype type;
413  int lens[3];
414  MPI_Aint disps[3];
415  MPI_Datatype types[3];
416  Int err = 0;
417 
418  int filemode = MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN;
419 
420  MPI_File fin;
421  MPI_Status status;
422 
423  // FIXME Maybe change to MPI_Comm_Dup.
424  pspmat.comm = comm;
425 
426  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fin);
427 
428  if (err != MPI_SUCCESS) {
429  ErrorHandling( "File cannot be opened!" );
430  }
431 
432  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
433  // Read header
434  if( mpirank == 0 ){
435  err = MPI_File_read_at(fin, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
436  err = MPI_File_read_at(fin, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
437  }
438 
439 
440  /* define a struct that describes all our data */
441  lens[0] = 1;
442  lens[1] = 1;
443  MPI_Address(&pspmat.size, &disps[0]);
444  MPI_Address(&pspmat.nnz, &disps[1]);
445  types[0] = MPI_INT;
446  types[1] = MPI_INT;
447  MPI_Type_struct(2, lens, disps, types, &type);
448  MPI_Type_commit(&type);
449 
450 
451  /* broadcast the header data to everyone */
452  MPI_Bcast(MPI_BOTTOM, 1, type, 0, comm);
453 
454  MPI_Type_free(&type);
455 
456  // Compute the number of columns on each processor
457  IntNumVec numColLocalVec(mpisize);
458  Int numColLocal, numColFirst;
459  numColFirst = pspmat.size / mpisize;
460  SetValue( numColLocalVec, numColFirst );
461  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
462  numColLocal = numColLocalVec[mpirank];
463  pspmat.colptrLocal.Resize( numColLocal + 1 );
464 
465 
466 
467  MPI_Offset myColPtrOffset = (2 + ((mpirank==0)?0:1) )*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
468 
469  Int np1 = 0;
470  lens[0] = (mpirank==0)?1:0;
471  lens[1] = numColLocal + 1;
472 
473  MPI_Address(&np1, &disps[0]);
474  MPI_Address(pspmat.colptrLocal.Data(), &disps[1]);
475 
476  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
477  MPI_Type_commit(&type);
478 
479  err= MPI_File_read_at_all(fin, myColPtrOffset, MPI_BOTTOM, 1, type, &status);
480 
481  if (err != MPI_SUCCESS) {
482  ErrorHandling( "error reading colptr" );
483  }
484  MPI_Type_free(&type);
485 
486  // Calculate nnz_loc on each processor
487  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
488 
489 
490  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
491  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
492 
493  //read rowIdx
494  MPI_Offset myRowIdxOffset = (3 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + (pspmat.colptrLocal[0]-1))*sizeof(int);
495 
496  lens[0] = (mpirank==0)?1:0;
497  lens[1] = pspmat.nnzLocal;
498 
499  MPI_Address(&np1, &disps[0]);
500  MPI_Address(pspmat.rowindLocal.Data(), &disps[1]);
501 
502  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
503  MPI_Type_commit(&type);
504 
505  err= MPI_File_read_at_all(fin, myRowIdxOffset, MPI_BOTTOM, 1, type,&status);
506 
507  if (err != MPI_SUCCESS) {
508  ErrorHandling( "error reading rowind" );
509  }
510  MPI_Type_free(&type);
511 
512 
513  //read nzval
514 // MPI_Offset myNzValOffset = (4 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(Int) + (pspmat.colptrLocal[0]-1)*sizeof(double);
515 //
516 // lens[0] = (mpirank==0)?1:0;
517 // lens[1] = pspmat.nnzLocal;
518 //
519 // MPI_Address(&np1, &disps[0]);
520 // MPI_Address(pspmat.nzvalLocal.Data(), &disps[1]);
521 //
522 // types[0] = MPI_INT;
523 // types[1] = MPI_DOUBLE;
524 //
525 // MPI_Type_create_struct(2, lens, disps, types, &type);
526 // MPI_Type_commit(&type);
527 //
528 // err = MPI_File_read_at_all(fin, myNzValOffset, MPI_BOTTOM, 1, type,&status);
529 // MPI_Type_free(&type);
530 
531  if( mpirank == 0 ){
532  MPI_Offset myNzValOffset = (4 )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(double);
533  err = MPI_File_read_at(fin, myNzValOffset,(char*)&np1, 1, MPI_INT, &status);
534  }
535 
536  MPI_Offset myNzValOffset = (5 )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(double);
537  err = MPI_File_read_at_all(fin, myNzValOffset, pspmat.nzvalLocal.Data(), pspmat.nnzLocal, MPI_DOUBLE,&status);
538 
539 
540 
541  if (err != MPI_SUCCESS) {
542  ErrorHandling( "error reading nzval" );
543  }
544 
545 
546 
547  //convert to local references
548  for( Int i = 1; i < numColLocal + 1; i++ ){
549  pspmat.colptrLocal[i] = pspmat.colptrLocal[i] - pspmat.colptrLocal[0] + 1;
550  }
551  pspmat.colptrLocal[0]=1;
552 
553  MPI_Barrier( comm );
554 
555  MPI_File_close(&fin);
556 
557  return ;
558 } // ----- end of function ParaReadDistSparseMatrix -----
559 
560 inline void ReadDistSparseMatrixFormatted ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
561 {
562  // Get the processor information within the current communicator
563  MPI_Barrier( comm );
564  Int mpirank; MPI_Comm_rank(comm, &mpirank);
565  Int mpisize; MPI_Comm_size(comm, &mpisize);
566  MPI_Status mpistat;
567  std::ifstream fin;
568 
569  // FIXME Maybe change to MPI_Comm_Dup.
570  pspmat.comm = comm;
571 
572  // FIXME Maybe need LongInt format to read the number of nonzeros later
573 
574  // Read basic information
575  if( mpirank == 0 ){
576  fin.open(filename);
577  if( !fin.good() ){
578  ErrorHandling( "File cannot be opened!" );
579  }
580  Int dummy;
581 
582  fin >> pspmat.size >> dummy ;
583  fin >> pspmat.nnz >> dummy;
584  // FIXME this is temporary and only applies to 4*4 matrix.
585  // fin >> dummy;
586  }
587 
588  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
589  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
590 
591  // Read colptr
592 
593  IntNumVec colptr(pspmat.size+1);
594  if( mpirank == 0 ){
595  Int* ptr = colptr.Data();
596  for( Int i = 0; i < pspmat.size+1; i++ )
597  fin >> *(ptr++);
598  }
599 
600  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
601 
602  // Compute the number of columns on each processor
603  IntNumVec numColLocalVec(mpisize);
604  Int numColLocal, numColFirst;
605  numColFirst = pspmat.size / mpisize;
606  SetValue( numColLocalVec, numColFirst );
607  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
608  numColLocal = numColLocalVec[mpirank];
609 
610  pspmat.colptrLocal.Resize( numColLocal + 1 );
611  for( Int i = 0; i < numColLocal + 1; i++ ){
612  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
613  }
614 
615  // Calculate nnz_loc on each processor
616  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
617 
618  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
619  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
620 
621  // Read and distribute the row indices
622  if( mpirank == 0 ){
623  Int tmp;
624  IntNumVec buf;
625  Int numRead;
626  for( Int ip = 0; ip < mpisize; ip++ ){
627  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
628  colptr[ip*numColFirst];
629  buf.Resize(numRead);
630  Int *ptr = buf.Data();
631  for( Int i = 0; i < numRead; i++ ){
632  fin >> *(ptr++);
633  }
634  if( ip > 0 ){
635  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
636  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
637  }
638  else{
639  pspmat.rowindLocal = buf;
640  }
641  }
642  }
643  else{
644  Int numRead;
645  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
646  if( numRead != pspmat.nnzLocal ){
647  std::ostringstream msg;
648  msg << "The number of columns in row indices do not match." << std::endl
649  << "numRead = " << numRead << std::endl
650  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
651  ErrorHandling( msg.str().c_str() );
652  }
653 
654  pspmat.rowindLocal.Resize( numRead );
655  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
656  }
657 
658  // std::cout << "Proc " << mpirank << " outputs rowindLocal.size() = "
659  // << pspmat.rowindLocal.m() << endl;
660 
661 
662  // Read and distribute the nonzero values
663  if( mpirank == 0 ){
664  Int tmp;
665  NumVec<Real> buf;
666  Int numRead;
667  for( Int ip = 0; ip < mpisize; ip++ ){
668  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
669  colptr[ip*numColFirst];
670  buf.Resize(numRead);
671  Real *ptr = buf.Data();
672  for( Int i = 0; i < numRead; i++ ){
673  fin >> *(ptr++);
674  }
675  if( ip > 0 ){
676  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
677  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
678  }
679  else{
680  pspmat.nzvalLocal = buf;
681  }
682  }
683  }
684  else{
685  Int numRead;
686  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
687  if( numRead != pspmat.nnzLocal ){
688  std::ostringstream msg;
689  msg << "The number of columns in values do not match." << std::endl
690  << "numRead = " << numRead << std::endl
691  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
692  ErrorHandling( msg.str().c_str() );
693  }
694 
695  pspmat.nzvalLocal.Resize( numRead );
696  MPI_Recv( pspmat.nzvalLocal.Data(), numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
697  }
698 
699  // Close the file
700  if( mpirank == 0 ){
701  fin.close();
702  }
703 
704 
705 
706  MPI_Barrier( comm );
707 
708 
709  return ;
710 } // ----- end of function ReadDistSparseMatrixFormatted -----
711 
712 
713 inline void ParaReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
714 {
715  // Get the processor information within the current communicator
716  MPI_Barrier( comm );
717  Int mpirank; MPI_Comm_rank(comm, &mpirank);
718  Int mpisize; MPI_Comm_size(comm, &mpisize);
719  MPI_Status mpistat;
720  MPI_Datatype type;
721  int lens[3];
722  MPI_Aint disps[3];
723  MPI_Datatype types[3];
724  Int err = 0;
725 
726  int filemode = MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN;
727 
728  MPI_File fin;
729  MPI_Status status;
730 
731  // FIXME Maybe change to MPI_Comm_Dup.
732  pspmat.comm = comm;
733 
734  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fin);
735 
736  if (err != MPI_SUCCESS) {
737  ErrorHandling( "File cannot be opened!" );
738  }
739 
740  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
741  // Read header
742  if( mpirank == 0 ){
743  err = MPI_File_read_at(fin, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
744  err = MPI_File_read_at(fin, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
745  }
746 
747 
748  /* define a struct that describes all our data */
749  lens[0] = 1;
750  lens[1] = 1;
751  MPI_Address(&pspmat.size, &disps[0]);
752  MPI_Address(&pspmat.nnz, &disps[1]);
753  types[0] = MPI_INT;
754  types[1] = MPI_INT;
755  MPI_Type_struct(2, lens, disps, types, &type);
756  MPI_Type_commit(&type);
757 
758 
759  /* broadcast the header data to everyone */
760  MPI_Bcast(MPI_BOTTOM, 1, type, 0, comm);
761 
762  MPI_Type_free(&type);
763 
764  // Compute the number of columns on each processor
765  IntNumVec numColLocalVec(mpisize);
766  Int numColLocal, numColFirst;
767  numColFirst = pspmat.size / mpisize;
768  SetValue( numColLocalVec, numColFirst );
769  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
770  numColLocal = numColLocalVec[mpirank];
771  pspmat.colptrLocal.Resize( numColLocal + 1 );
772 
773 
774 
775  MPI_Offset myColPtrOffset = (2 + ((mpirank==0)?0:1) )*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
776 
777  Int np1 = 0;
778  lens[0] = (mpirank==0)?1:0;
779  lens[1] = numColLocal + 1;
780 
781  MPI_Address(&np1, &disps[0]);
782  MPI_Address(pspmat.colptrLocal.Data(), &disps[1]);
783 
784  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
785  MPI_Type_commit(&type);
786 
787  err= MPI_File_read_at_all(fin, myColPtrOffset, MPI_BOTTOM, 1, type, &status);
788 
789  if (err != MPI_SUCCESS) {
790  ErrorHandling( "error reading colptr" );
791  }
792  MPI_Type_free(&type);
793 
794  // Calculate nnz_loc on each processor
795  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
796 
797 
798  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
799  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
800 
801  //read rowIdx
802  MPI_Offset myRowIdxOffset = (3 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.colptrLocal[0]-1)*sizeof(Int);
803 
804  lens[0] = (mpirank==0)?1:0;
805  lens[1] = pspmat.nnzLocal;
806 
807  MPI_Address(&np1, &disps[0]);
808  MPI_Address(pspmat.rowindLocal.Data(), &disps[1]);
809 
810 // MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
811  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
812  MPI_Type_commit(&type);
813 
814  err= MPI_File_read_at_all(fin, myRowIdxOffset, MPI_BOTTOM, 1, type,&status);
815 
816  if (err != MPI_SUCCESS) {
817  ErrorHandling( "error reading rowind" );
818  }
819  MPI_Type_free(&type);
820 
821 
822  //read nzval
823 // MPI_Offset myNzValOffset = (4 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(Complex);
824 
825 // lens[0] = ((mpirank==0)?1:0)*sizeof(Int);
826 // lens[1] = pspmat.nnzLocal*sizeof(Complex);
827 //
828 // MPI_Address(&np1, &disps[0]);
829 // MPI_Address(pspmat.nzvalLocal.Data(), &disps[1]);
830 //
831 // types[0] = MPI_INT;
832 // types[1] = MPI_DOUBLE_COMPLEX;
833 // MPI_Type_create_struct(2, lens, disps, types, &type);
834 // MPI_Type_commit(&type);
835 //
836 // err = MPI_File_read_at_all(fin, myNzValOffset, MPI_BOTTOM, 1, type,&status);
837 //
838 // MPI_Type_free(&type);
839 
840  if( mpirank == 0 ){
841  MPI_Offset myNzValOffset = (4 )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(Complex);
842  err = MPI_File_read_at(fin, myNzValOffset,(char*)&np1, 1, MPI_INT, &status);
843  }
844 
845  MPI_Offset myNzValOffset = (5 )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(Complex);
846  err = MPI_File_read_at_all(fin, myNzValOffset, pspmat.nzvalLocal.Data(), pspmat.nnzLocal, MPI_DOUBLE_COMPLEX,&status);
847 
848 
849 
850  if (err != MPI_SUCCESS) {
851  ErrorHandling( "error reading nzval" );
852  }
853 
854 
855 
856  //convert to local references
857  for( Int i = 1; i < numColLocal + 1; i++ ){
858  pspmat.colptrLocal[i] = pspmat.colptrLocal[i] - pspmat.colptrLocal[0] + 1;
859  }
860  pspmat.colptrLocal[0]=1;
861 
862  MPI_Barrier( comm );
863 
864  MPI_File_close(&fin);
865 
866  return ;
867 } // ----- end of function ParaReadDistSparseMatrix -----
868 
869 inline void ParaWriteDistSparseMatrix ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
870 {
871  // Get the processor information within the current communicator
872  MPI_Barrier( comm );
873  Int mpirank; MPI_Comm_rank(comm, &mpirank);
874  Int mpisize; MPI_Comm_size(comm, &mpisize);
875  MPI_Status mpistat;
876  Int err = 0;
877 
878 
879 
880  int filemode = MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN;
881 
882  MPI_File fout;
883  MPI_Status status;
884 
885 
886 
887  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fout);
888 
889  if (err != MPI_SUCCESS) {
890  ErrorHandling( "File cannot be opened!" );
891  }
892 
893  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
894  // Write header
895  if( mpirank == 0 ){
896  err = MPI_File_write_at(fout, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
897  err = MPI_File_write_at(fout, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
898  }
899 
900 
901  // Compute the number of columns on each processor
902  Int numColLocal = pspmat.colptrLocal.m()-1;
903  Int numColFirst = pspmat.size / mpisize;
904  IntNumVec colptrChunk(numColLocal+1);
905 
906  Int prev_nz = 0;
907  MPI_Exscan(&pspmat.nnzLocal, &prev_nz, 1, MPI_INT, MPI_SUM, comm);
908 
909  for( Int i = 0; i < numColLocal + 1; i++ ){
910  colptrChunk[i] = pspmat.colptrLocal[i] + prev_nz;
911  }
912 
913 
914  MPI_Datatype memtype, filetype;
915  MPI_Aint disps[6];
916  int blklens[6];
917  MPI_Datatype types[6] = {MPI_INT,MPI_INT, MPI_INT,MPI_INT, MPI_INT,MPI_DOUBLE_COMPLEX};
918 
919  /* set block lengths (same for both types) */
920  blklens[0] = (mpirank==0)?1:0;
921  blklens[1] = numColLocal+1;
922  blklens[2] = (mpirank==0)?1:0;
923  blklens[3] = pspmat.nnzLocal;
924  blklens[4] = (mpirank==0)?1:0;
925  blklens[5] = pspmat.nnzLocal;
926 
927 
928 
929 
930  //Calculate offsets
931  MPI_Offset myColPtrOffset, myRowIdxOffset, myNzValOffset;
932  myColPtrOffset = 3*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
933  myRowIdxOffset = 3*sizeof(int) + (pspmat.size +1 + prev_nz)*sizeof(Int);
934  myNzValOffset = 4*sizeof(int) + (pspmat.size +1 + pspmat.nnz)*sizeof(Int)+ prev_nz*sizeof(Complex);
935  disps[0] = 2*sizeof(int);
936  disps[1] = myColPtrOffset;
937  disps[2] = myRowIdxOffset;
938  disps[3] = sizeof(int)+myRowIdxOffset;
939  disps[4] = myNzValOffset;
940  disps[5] = sizeof(int)+myNzValOffset;
941 
942 
943 
944 #if ( _DEBUGlevel_ >= 1 )
945  char msg[200];
946  char * tmp = msg;
947  tmp += sprintf(tmp,"P%d ",mpirank);
948  for(int i = 0; i<6; ++i){
949  if(i==5)
950  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(Complex));
951  else
952  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(int));
953  }
954  tmp += sprintf(tmp,"\n");
955  printf("%s",msg);
956 #endif
957 
958 
959 
960 
961  MPI_Type_create_struct(6, blklens, disps, types, &filetype);
962  MPI_Type_commit(&filetype);
963 
964  /* create memory type */
965  Int np1 = pspmat.size+1;
966  MPI_Address( (void *)&np1, &disps[0]);
967  MPI_Address(colptrChunk.Data(), &disps[1]);
968  MPI_Address( (void *)&pspmat.nnz, &disps[2]);
969  MPI_Address((void *)pspmat.rowindLocal.Data(), &disps[3]);
970  MPI_Address( (void *)&pspmat.nnz, &disps[4]);
971  MPI_Address((void *)pspmat.nzvalLocal.Data(), &disps[5]);
972 
973  MPI_Type_create_struct(6, blklens, disps, types, &memtype);
974  MPI_Type_commit(&memtype);
975 
976 
977 
978  /* set file view */
979  err = MPI_File_set_view(fout, 0, MPI_BYTE, filetype, "native",MPI_INFO_NULL);
980 
981  /* everyone writes their own row offsets, columns, and
982  * data with one big noncontiguous write (in memory and
983  * file)
984  */
985  err = MPI_File_write_all(fout, MPI_BOTTOM, 1, memtype, &status);
986 
987  MPI_Type_free(&filetype);
988  MPI_Type_free(&memtype);
989 
990 
991 
992 
993 
994  MPI_Barrier( comm );
995 
996  MPI_File_close(&fout);
997 
998  return ;
999 } // ----- end of function ParaWriteDistSparseMatrix -----
1000 
1001 
1002 
1003 inline void ReadDistSparseMatrixFormatted ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
1004 {
1005  // Get the processor information within the current communicator
1006  MPI_Barrier( comm );
1007  Int mpirank; MPI_Comm_rank(comm, &mpirank);
1008  Int mpisize; MPI_Comm_size(comm, &mpisize);
1009  MPI_Status mpistat;
1010  std::ifstream fin;
1011 
1012  // FIXME Maybe change to MPI_Comm_Dup.
1013  pspmat.comm = comm;
1014 
1015  // FIXME Maybe need LongInt format to read the number of nonzeros later
1016 
1017  // Read basic information
1018  if( mpirank == 0 ){
1019  fin.open(filename);
1020  if( !fin.good() ){
1021  ErrorHandling( "File cannot be opened!" );
1022  }
1023  Int dummy;
1024 
1025  fin >> pspmat.size >> dummy;
1026  fin >> pspmat.nnz >> dummy;
1027  // FIXME this is temporary and only applies to 4*4 matrix.
1028  // fin >> dummy;
1029  }
1030 
1031  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
1032  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
1033 
1034  // Read colptr
1035 
1036  IntNumVec colptr(pspmat.size+1);
1037  if( mpirank == 0 ){
1038  Int* ptr = colptr.Data();
1039  for( Int i = 0; i < pspmat.size+1; i++ )
1040  fin >> *(ptr++);
1041  }
1042 
1043  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
1044 
1045  // Compute the number of columns on each processor
1046  IntNumVec numColLocalVec(mpisize);
1047  Int numColLocal, numColFirst;
1048  numColFirst = pspmat.size / mpisize;
1049  SetValue( numColLocalVec, numColFirst );
1050  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
1051  numColLocal = numColLocalVec[mpirank];
1052 
1053  pspmat.colptrLocal.Resize( numColLocal + 1 );
1054  for( Int i = 0; i < numColLocal + 1; i++ ){
1055  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
1056  }
1057 
1058  // Calculate nnz_loc on each processor
1059  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
1060 
1061  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
1062  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
1063 
1064  // Read and distribute the row indices
1065  if( mpirank == 0 ){
1066  Int tmp;
1067  IntNumVec buf;
1068  Int numRead;
1069  for( Int ip = 0; ip < mpisize; ip++ ){
1070  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
1071  colptr[ip*numColFirst];
1072 
1073 
1074  buf.Resize(numRead);
1075  Int *ptr = buf.Data();
1076  for( Int i = 0; i < numRead; i++ ){
1077  fin >> *(ptr++);
1078  }
1079  if( ip > 0 ){
1080  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
1081  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
1082  }
1083  else{
1084  pspmat.rowindLocal = buf;
1085  }
1086  }
1087  }
1088  else{
1089  Int numRead;
1090  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
1091  if( numRead != pspmat.nnzLocal ){
1092  std::ostringstream msg;
1093  msg << "The number of columns in row indices do not match." << std::endl
1094  << "numRead = " << numRead << std::endl
1095  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
1096  ErrorHandling( msg.str().c_str() );
1097  }
1098 
1099  pspmat.rowindLocal.Resize( numRead );
1100  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
1101  }
1102 
1103  // std::cout << "Proc " << mpirank << " outputs rowindLocal.size() = "
1104  // << pspmat.rowindLocal.m() << endl;
1105 
1106 
1107  // Read and distribute the nonzero values
1108  if( mpirank == 0 ){
1109  Int tmp;
1110  NumVec<Real> buf;
1111  Int numRead;
1112  for( Int ip = 0; ip < mpisize; ip++ ){
1113  numRead = 2* (colptr[ip*numColFirst + numColLocalVec[ip]] -
1114  colptr[ip*numColFirst]);
1115  Real *ptr;
1116  if( ip > 0 ){
1117  buf.Resize(numRead);
1118  ptr = buf.Data();
1119  }
1120  else{
1121  ptr = reinterpret_cast<Real*>(pspmat.nzvalLocal.Data());
1122  }
1123 
1124  //read data in either buf or pspmat.nzvalLocal
1125  for( Int i = 0; i < numRead; i++ ){
1126  fin >> *(ptr++);
1127  }
1128 
1129  if( ip > 0 ){
1130  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
1131  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
1132  }
1133  // else{
1134  // std::copy( buf.Data(), buf.Data() + numRead, pspmat.nzvalLocal.Data() );
1135  // memcpy( (void*)( pspmat.nzvalLocal.Data() ),
1136  // (void*)buf.Data(), sizeof(Real)*numRead );
1137  // }
1138  }
1139  }
1140  else{
1141  Int numRead;
1142  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
1143  if( numRead != 2*pspmat.nnzLocal ){
1144  std::ostringstream msg;
1145  msg << "The number of columns in values do not match." << std::endl
1146  << "numRead = " << numRead << std::endl
1147  << "2*nnzLocal = " << 2*pspmat.nnzLocal << std::endl;
1148  ErrorHandling( msg.str().c_str() );
1149  }
1150 
1151  pspmat.nzvalLocal.Resize( numRead/2 );
1152  MPI_Recv( reinterpret_cast<Real*>(pspmat.nzvalLocal.Data()),
1153  numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
1154  }
1155 
1156  // Close the file
1157  if( mpirank == 0 ){
1158  fin.close();
1159  }
1160 
1161 
1162 
1163  MPI_Barrier( comm );
1164 
1165 
1166  return ;
1167 } // ----- end of function ReadDistSparseMatrixFormatted -----
1168 
1169 
1170 template<typename T> inline void GetDiagonal ( const DistSparseMatrix<T>& A, NumVec<T>& diag )
1171 {
1172  Int mpirank, mpisize;
1173  MPI_Comm_rank( A.comm, &mpirank );
1174  MPI_Comm_size( A.comm, &mpisize );
1175 
1176  NumVec<T> diagLocal( A.size );
1177  SetValue( diagLocal, ZERO<T>() );
1178  diag.Resize( A.size );
1179  SetValue( diag, ZERO<T>() );
1180 
1181  Int numColFirst = A.size / mpisize;
1182  Int firstCol = mpirank * numColFirst;
1183  Int numColLocal = A.colptrLocal.m() - 1;
1184 
1185 #if ( _DEBUGlevel_ >= 1 )
1186  statusOFS << "numColFirst = " << numColFirst << std::endl;
1187  statusOFS << "A.nzvalLocal.size = " << A.nzvalLocal.m() << std::endl;
1188  statusOFS << "A.nnzLocal = " << A.nnzLocal << std::endl;
1189 #endif
1190 
1191  // Note that the indices in DistSparseMatrix follows the FORTRAN convention
1192  for( Int j = 0; j < numColLocal; j++ ){
1193  Int jcol = j + firstCol + 1;
1194  Int numRow = A.colptrLocal(j+1) - A.colptrLocal(j);
1195  const Int* rowPtr = &A.rowindLocal( A.colptrLocal(j) - 1 );
1196  // NOTE: The rows in DistSparseMatrix are not necessarily ordered.
1197  // So lower_bound cannot be used here for fast searching. find has to be used.
1198  const Int* ptr = std::find( rowPtr, rowPtr + numRow, jcol );
1199  if( ptr == rowPtr + numRow ){
1200  std::ostringstream msg;
1201  msg << "Serious problem. Did not find the row corresponding to the column." << std::endl
1202  << "This happens when j = " << j << ", jcol = " << jcol << ", and the row indices are " << std::endl
1203  << IntNumVec( numRow, false, const_cast<Int*>(rowPtr) ) << std::endl;
1204  ErrorHandling( msg.str().c_str() );
1205  }
1206  Int diagIdx = ptr - A.rowindLocal.Data();
1207  diagLocal( jcol - 1 ) = A.nzvalLocal( diagIdx );
1208  }
1209 
1210  mpi::Allreduce( &diagLocal[0], &diag[0], A.size, MPI_SUM, A.comm );
1211 
1212 
1213  return ;
1214 } // ----- end of function GetDiagonal -----
1215 
1216 
1217 
1218 
1219 template <typename T> void CSCToCSR(DistSparseMatrix<T>& sparseA, DistSparseMatrix<T> & sparseB ){
1220 
1221 
1222  Int mpirank;
1223  MPI_Comm_rank(sparseA.comm,&mpirank);
1224 
1225  Int mpisize;
1226  MPI_Comm_size(sparseA.comm,&mpisize);
1227 
1228  Int numRowLocalFirst = sparseA.size / mpisize;
1229  Int firstRow = mpirank * numRowLocalFirst;
1230  Int numRowLocal = -1;
1231  Int nnzLocal = -1;
1232 
1233  sparseB.size = sparseA.size;
1234  sparseB.nnz = sparseA.nnz;
1235  sparseB.comm = sparseA.comm;
1236 
1237 
1238  LongInt nnz = 0;
1239  IntNumVec rowindGlobal;
1240  IntNumVec colptrGlobal;
1241  //TIMER_START(ToGlobalStructure);
1242  {
1243  colptrGlobal.Resize(sparseA.size+1);
1244 
1245  /* Allgatherv for row indices. */
1246  IntNumVec prevnz(mpisize);
1247  IntNumVec rcounts(mpisize);
1248  MPI_Allgather((void*)&sparseA.nnzLocal, 1, MPI_INT, rcounts.Data(), 1, MPI_INT, sparseA.comm);
1249 
1250  prevnz[0] = 0;
1251  for (Int i = 0; i < mpisize-1; ++i) { prevnz[i+1] = prevnz[i] + rcounts[i]; }
1252 
1253  nnz = 0;
1254  for (Int i = 0; i < mpisize; ++i) { nnz += rcounts[i]; }
1255  rowindGlobal.Resize(nnz);
1256 
1257  MPI_Allgatherv(sparseA.rowindLocal.Data(), sparseA.nnzLocal, MPI_INT, rowindGlobal.Data(),rcounts.Data(), prevnz.Data(), MPI_INT, sparseA.comm);
1258 
1259  /* Allgatherv for colptr */
1260  // Compute the number of columns on each processor
1261  Int numColFirst = std::max(1,sparseA.size / mpisize);
1262  SetValue( rcounts, numColFirst );
1263  rcounts[mpisize-1] = sparseA.size - numColFirst * (mpisize-1); // Modify the last entry
1264 
1265  IntNumVec rdispls(mpisize);
1266  rdispls[0] = 0;
1267  for (Int i = 0; i < mpisize-1; ++i) { rdispls[i+1] = rdispls[i] + rcounts[i]; }
1268 
1269  MPI_Allgatherv(sparseA.colptrLocal.Data(), sparseA.colptrLocal.m()-1, MPI_INT, colptrGlobal.Data(),
1270  rcounts.Data(), rdispls.Data(), MPI_INT, sparseA.comm);
1271 
1272  /* Recompute column pointers. */
1273  for (Int p = 1; p < mpisize; p++) {
1274  Int idx = rdispls[p];
1275  for (Int j = 0; j < rcounts[p]; ++j) colptrGlobal[idx++] += prevnz[p];
1276  }
1277  colptrGlobal(sparseA.size)= nnz+1;
1278  }
1279  //TIMER_STOP(ToGlobalStructure);
1280 
1281 
1282  IntNumVec rowptrGlobal;
1283  IntNumVec colindGlobal;
1284  //Compute Global CSR structure and Local CSR structure
1285  {
1286  Int i, j;
1287 
1288  colindGlobal.Resize(nnz);
1289  rowptrGlobal.Resize(sparseA.size+1);
1290  IntNumVec row_nnz(sparseA.size);
1291  SetValue(row_nnz,I_ZERO);
1292 
1293  for (i = 0; i < nnz; i++) {
1294  Int k = rowindGlobal[i];
1295  row_nnz[k-1]++;
1296  }
1297 
1298  rowptrGlobal[0]=1;
1299  for (i = 1; i <= sparseA.size; i++)
1300  {
1301  rowptrGlobal[i] = rowptrGlobal[i-1] + row_nnz[i-1];
1302  row_nnz[i-1] = 0;
1303  }
1304 
1305  /*
1306  * convert from CSC to CSR.
1307  * use row_nnz to keep track of the number of non-zeros
1308  * added to each row.
1309  */
1310  for (j = 0; j < colptrGlobal.m()-1; j++)
1311  {
1312  Int k;
1313  Int nnz_col; /* # of non-zeros in column j of A */
1314 
1315  nnz_col = colptrGlobal[j+1] - colptrGlobal[j];
1316 
1317  for (k = colptrGlobal[j]; k < colptrGlobal[j+1]; k++)
1318  {
1319  Int i = rowindGlobal[ k - 1 ]; /* row index */
1320  Int h = rowptrGlobal[i-1] + row_nnz[i-1]; /* non-zero position */
1321 
1322  /* add the non-zero A(i,j) to B */
1323  colindGlobal[ h - 1 ] = j+1;
1324  row_nnz[i-1]++;
1325  }
1326  }
1327 
1328 
1329 
1330  }
1331 
1332  //This assertion is only ok for structurally symmetric matrices
1333  //for(int i=0;i<rowptrGlobal.m();++i){ assert(rowptrGlobal[i]==colptrGlobal[i]); }
1334 
1335  //for(int i=1;i<rowptrGlobal.m();++i){
1336  // Int colbeg = rowptrGlobal[i-1];
1337  // Int colend = rowptrGlobal[i]-1;
1338  // for(Int j=colbeg;j<=colend;++j){
1339  // Int row = colindGlobal[j-1];
1340 
1341  // Int * ptr = std::find(&rowindGlobal[colbeg-1],&rowindGlobal[colend-1]+1,row);
1342  // }
1343  //}
1344 
1345  //compute the local CSR structure
1346  std::vector<Int > numRowVec(mpisize,numRowLocalFirst);
1347  numRowVec[mpisize-1] = sparseA.size - (mpisize-1)*numRowLocalFirst;
1348 
1349  numRowLocal = numRowVec[mpirank];
1350  Int myFirstCol = mpirank*numRowLocalFirst+1;
1351  Int myLastCol = myFirstCol + numRowLocal -1;
1352 
1353  sparseB.colptrLocal.Resize(numRowLocal+1);
1354  Int * rowptrLocal = &sparseB.colptrLocal[0];
1355 
1356  //copy local chunk of rowptr
1357  std::copy(&rowptrGlobal[myFirstCol-1],
1358  &rowptrGlobal[myLastCol]+1,
1359  rowptrLocal);
1360 
1361  nnzLocal = rowptrLocal[numRowLocal] - rowptrLocal[0];
1362 
1363  sparseB.nnzLocal = nnzLocal;
1364 
1365  sparseB.rowindLocal.Resize(nnzLocal);
1366  Int * colindLocal = &sparseB.rowindLocal[0];
1367 
1368  sparseB.nzvalLocal.Resize(nnzLocal);
1369  T * nzvalLocal = &sparseB.nzvalLocal[0];
1370 
1371  //copy local chunk of colind
1372  std::copy(&colindGlobal[rowptrLocal[0]-1],
1373  &colindGlobal[rowptrLocal[0]-1]+nnzLocal,
1374  colindLocal);
1375 
1376  //convert rowptr to local references
1377  for( Int i = 1; i < numRowLocal + 1; i++ ){
1378  rowptrLocal[i] = rowptrLocal[i] - rowptrLocal[0] + 1;
1379  }
1380  rowptrLocal[0]=1;
1381 
1382  //for(int i=0;i<numRowLocal;++i){
1383  // Int col = i+myFirstCol;
1384  // Int colbeg = rowptrLocal[i];
1385  // Int colend = rowptrLocal[i+1]-1;
1386 
1387  // for(Int j=colbeg;j<=colend;++j){
1388  // Int row = colindLocal[j-1];
1389 
1390  // const Int * ptr = std::find(&sparseA.rowindLocal[colbeg-1],&sparseA.rowindLocal[colend-1]+1,row);
1391  // }
1392 
1393  //}
1394 
1395 
1396  {
1397  //Redistribute the data
1398  //local pointer to head to rows in local CSR structure
1399  std::vector<Int> * row_nnz = new std::vector<Int>(numRowLocal,0);
1400 
1401  for(int p =0; p<mpisize;p++){
1402  std::vector< char > send_buffer;
1403  std::vector< char > recv_buffer;
1404  std::vector<int> displs(mpisize+1,0);
1405  std::vector<int> bufsizes(mpisize,0);
1406 
1407  //parse the Global CSC structure to count
1408  Int firstCol = p*numRowLocalFirst+1;
1409  Int lastCol = firstCol + numRowVec[p]-1;
1410  for(Int col = 1; col<colptrGlobal.m();++col){
1411  if(col>= firstCol && col<= lastCol){
1412  Int colbeg = colptrGlobal[col-1];
1413  Int colend = colptrGlobal[col]-1;
1414  for(Int i=colbeg;i<=colend;++i){
1415  Int row = rowindGlobal[i-1];
1416  //determine where it should be packed ?
1417  Int p_dest = std::min(mpisize-1,(row-1)/(numRowLocalFirst));
1418  bufsizes[p_dest]+=sizeof(T);
1419  }
1420  }
1421  else if(col>lastCol){
1422  break;
1423  }
1424  }
1425 
1426  //compute displacement (cum sum)
1427  std::partial_sum(bufsizes.begin(),bufsizes.end(),displs.begin()+1);
1428 
1429  if(mpirank==p){
1430  std::vector<int> bufpos(mpisize,0);
1431 
1432  //resize buffers
1433  Int send_buffer_size = displs[mpisize];
1434  send_buffer.resize(displs[mpisize]);
1435 
1436  //fill the buffers by parsing local CSC structure
1437  for(Int j = 1; j<sparseA.colptrLocal.m();++j){
1438  Int gCol = mpirank*numRowLocalFirst + j;
1439  Int colbeg = sparseA.colptrLocal[j-1];
1440  Int colend = sparseA.colptrLocal[j]-1;
1441  for(Int i=colbeg;i<=colend;++i){
1442  Int row = sparseA.rowindLocal[i-1];
1443  //determine where it should be packed ?
1444  Int p_dest = std::min(mpisize-1,(row-1)/(numRowLocalFirst));
1445  Int p_displs = displs[p_dest];
1446  Int p_bufpos = bufpos[p_dest];
1447  *((T *)&send_buffer.at( displs[p_dest] + bufpos[p_dest] )) = sparseA.nzvalLocal[i-1];
1448  //advance position
1449  bufpos[p_dest]+= sizeof(T);
1450  }
1451  }
1452  }
1453 
1454 
1455 
1456  Int recv_buffer_size = bufsizes[mpirank];
1457  recv_buffer.resize(bufsizes[mpirank]);
1458 
1459  //scatterv
1460  MPI_Scatterv(&send_buffer[0], &bufsizes[0], &displs[0], MPI_BYTE, &recv_buffer[0], bufsizes[mpirank], MPI_BYTE, p, sparseA.comm);
1461 
1462  //process data
1463  Int recv_pos = 0;
1464  //parse the Global CSC structure to count
1465  for(Int col = 1; col<colptrGlobal.m();++col){
1466  if(col>= firstCol && col<= lastCol){
1467  Int colbeg = colptrGlobal[col-1];
1468  Int colend = colptrGlobal[col]-1;
1469  for(Int i=colbeg;i<=colend;++i){
1470  Int row = rowindGlobal[i-1];
1471  Int p_dest = std::min(mpisize-1,(row-1)/(numRowLocalFirst));
1472  if(p_dest==mpirank){
1473  //compute local CSR coordinates
1474  Int local_row = row - mpirank*numRowLocalFirst;
1475  Int h = rowptrLocal[local_row-1] + row_nnz->at(local_row-1); /* non-zero position */
1476  *((T*)&nzvalLocal[h-1]) = *((T*)&recv_buffer.at(recv_pos));
1477  //advance position in CSR buffer
1478  row_nnz->at(local_row-1)++;
1479  //advance position in CSC recv_buffer
1480  recv_pos+=sizeof(T);
1481  }
1482  }
1483  }
1484  else if(col>lastCol){
1485  break;
1486  }
1487  }
1488  }
1489  delete row_nnz;
1490  }
1491 }
1492 
1493 
1494 
1495 
1496 }
1497 #endif //_PEXSI_UTILITY_IMPL_HPP_
void SetValue(NumMat< F > &M, F val)
SetValue sets a numerical matrix to a constant val.
Definition: NumMat_impl.hpp:153