PEXSI
 All Classes Namespaces Files Functions Variables Typedefs Pages
utility_impl.hpp
Go to the documentation of this file.
1 /*
2  Copyright (c) 2012 The Regents of the University of California,
3  through Lawrence Berkeley National Laboratory.
4 
5  Author: Lin Lin and Mathias Jacquelin
6 
7  This file is part of PEXSI. All rights reserved.
8 
9  Redistribution and use in source and binary forms, with or without
10  modification, are permitted provided that the following conditions are met:
11 
12  (1) Redistributions of source code must retain the above copyright notice, this
13  list of conditions and the following disclaimer.
14  (2) Redistributions in binary form must reproduce the above copyright notice,
15  this list of conditions and the following disclaimer in the documentation
16  and/or other materials provided with the distribution.
17  (3) Neither the name of the University of California, Lawrence Berkeley
18  National Laboratory, U.S. Dept. of Energy nor the names of its contributors may
19  be used to endorse or promote products derived from this software without
20  specific prior written permission.
21 
22  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
23  ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
24  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
25  DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
26  ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
27  (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
29  ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 
33  You are under no obligation whatsoever to provide any bug fixes, patches, or
34  upgrades to the features, functionality or performance of the source code
35  ("Enhancements") to anyone; however, if you choose to make your Enhancements
36  available either publicly, or directly to Lawrence Berkeley National
37  Laboratory, without imposing a separate written license agreement for such
38  Enhancements, then you hereby grant the following license: a non-exclusive,
39  royalty-free perpetual license to install, use, modify, prepare derivative
40  works, incorporate into other computer software, distribute, and sublicense
41  such enhancements or derivative works thereof, in binary and source code form.
42 */
46 #ifndef _PEXSI_UTILITY_IMPL_HPP_
47 #define _PEXSI_UTILITY_IMPL_HPP_
48 
49 #include <numeric>
50 
51 using namespace std;
52 using std::ifstream;
53 using std::ofstream;
54 using std::vector;
55 using std::cerr;
56 
57 
58 namespace PEXSI{
59 
60 // *********************************************************************
61 // Sparse Matrix
62 // *********************************************************************
63 
64 //---------------------------------------------------------
65 inline void ReadSparseMatrix ( const char* filename, SparseMatrix<Real>& spmat )
66 {
67 #ifndef _RELEASE_
68  PushCallStack("ReadSparseMatrix");
69 #endif
70 
71  // FIXME
72  // Binary format
73  if( 1 ){
74  std::istringstream iss;
75  SharedRead( filename, iss );
76  deserialize( spmat.size, iss, NO_MASK );
77  deserialize( spmat.nnz, iss, NO_MASK );
78  deserialize( spmat.colptr, iss, NO_MASK );
79  deserialize( spmat.rowind, iss, NO_MASK );
80  deserialize( spmat.nzval, iss, NO_MASK );
81  }
82 
83  // Ascii format
84  if( 0 ) {
85  ifstream fin(filename);
86  fin >> spmat.size >> spmat.nnz;
87 
88  spmat.colptr.Resize( spmat.size+1 );
89  spmat.rowind.Resize( spmat.nnz );
90  spmat.nzval.Resize ( spmat.nnz );
91 
92  for( Int i = 0; i < spmat.size + 1; i++ ){
93  fin >> spmat.colptr(i);
94  }
95 
96  for( Int i = 0; i < spmat.nnz; i++ ){
97  fin >> spmat.rowind(i);
98  }
99 
100  for( Int i = 0; i < spmat.nnz; i++ ){
101  fin >> spmat.nzval(i);
102  }
103 
104  fin.close();
105  }
106 #ifndef _RELEASE_
107  PopCallStack();
108 #endif
109 
110  return ;
111 } // ----- end of function ReadSparseMatrix -----
112 
113 //---------------------------------------------------------
114 inline void ReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
115 {
116 #ifndef _RELEASE_
117  PushCallStack("ReadDistSparseMatrix");
118 #endif
119  // Get the processor information within the current communicator
120  MPI_Barrier( comm );
121  Int mpirank; MPI_Comm_rank(comm, &mpirank);
122  Int mpisize; MPI_Comm_size(comm, &mpisize);
123  MPI_Status mpistat;
124  std::ifstream fin;
125 
126  // FIXME Maybe change to MPI_Comm_Dup.
127  pspmat.comm = comm;
128 
129  // Read basic information
130  if( mpirank == 0 ){
131  fin.open(filename);
132  if( !fin.good() ){
133  #ifdef USE_ABORT
134 abort();
135 #endif
136 throw std::logic_error( "File cannot be opened!" );
137  }
138  fin.read((char*)&pspmat.size, sizeof(Int));
139  fin.read((char*)&pspmat.nnz, sizeof(Int));
140  }
141 
142  // FIXME Maybe need LongInt format to read the number of nonzeros later
143 
144  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
145  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
146 
147  // Read colptr
148  IntNumVec colptr(pspmat.size+1);
149  if( mpirank == 0 ){
150  Int tmp;
151  fin.read((char*)&tmp, sizeof(Int));
152 
153  if( tmp != pspmat.size+1 ){
154  #ifdef USE_ABORT
155 abort();
156 #endif
157 throw std::logic_error( "colptr is not of the right size." );
158  }
159 
160  fin.read((char*)colptr.Data(), sizeof(Int)*tmp);
161 
162  }
163 
164  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
165 
166  // Compute the number of columns on each processor
167  IntNumVec numColLocalVec(mpisize);
168  Int numColLocal, numColFirst;
169  numColFirst = pspmat.size / mpisize;
170  SetValue( numColLocalVec, numColFirst );
171  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
172  numColLocal = numColLocalVec[mpirank];
173 
174  pspmat.colptrLocal.Resize( numColLocal + 1 );
175  for( Int i = 0; i < numColLocal + 1; i++ ){
176  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
177  }
178 
179  // Calculate nnz_loc on each processor
180  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
181 
182  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
183  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
184 
185  // Read and distribute the row indices
186  if( mpirank == 0 ){
187  Int tmp;
188  fin.read((char*)&tmp, sizeof(Int));
189 
190  if( tmp != pspmat.nnz ){
191  std::ostringstream msg;
192  msg
193  << "The number of nonzeros in row indices do not match." << std::endl
194  << "nnz = " << pspmat.nnz << std::endl
195  << "size of row indices = " << tmp << std::endl;
196  #ifdef USE_ABORT
197 abort();
198 #endif
199 throw std::logic_error( msg.str().c_str() );
200  }
201  IntNumVec buf;
202  Int numRead;
203  for( Int ip = 0; ip < mpisize; ip++ ){
204  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
205  colptr[ip*numColFirst];
206  buf.Resize(numRead);
207  fin.read( (char*)buf.Data(), numRead*sizeof(Int) );
208 
209  if( ip > 0 ){
210  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
211  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
212  }
213  else{
214  pspmat.rowindLocal = buf;
215  }
216  }
217  }
218  else{
219  Int numRead;
220  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
221  if( numRead != pspmat.nnzLocal ){
222  std::ostringstream msg;
223  msg << "The number of columns in row indices do not match." << std::endl
224  << "numRead = " << numRead << std::endl
225  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
226  #ifdef USE_ABORT
227 abort();
228 #endif
229 throw std::logic_error( msg.str().c_str() );
230  }
231 
232  pspmat.rowindLocal.Resize( numRead );
233  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
234  }
235 
236  // Read and distribute the nonzero values
237  if( mpirank == 0 ){
238  Int tmp;
239  fin.read((char*)&tmp, sizeof(Int));
240 
241  if( tmp != pspmat.nnz ){
242  std::ostringstream msg;
243  msg
244  << "The number of nonzeros in values do not match." << std::endl
245  << "nnz = " << pspmat.nnz << std::endl
246  << "size of values = " << tmp << std::endl;
247  #ifdef USE_ABORT
248 abort();
249 #endif
250 throw std::logic_error( msg.str().c_str() );
251  }
252  NumVec<Real> buf;
253  Int numRead;
254  for( Int ip = 0; ip < mpisize; ip++ ){
255  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
256  colptr[ip*numColFirst];
257  buf.Resize(numRead);
258  fin.read( (char*)buf.Data(), numRead*sizeof(Real) );
259 
260  if( ip > 0 ){
261  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
262  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
263  }
264  else{
265  pspmat.nzvalLocal = buf;
266  }
267  }
268  }
269  else{
270  Int numRead;
271  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
272  if( numRead != pspmat.nnzLocal ){
273  std::ostringstream msg;
274  msg << "The number of columns in values do not match." << std::endl
275  << "numRead = " << numRead << std::endl
276  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
277  #ifdef USE_ABORT
278 abort();
279 #endif
280 throw std::logic_error( msg.str().c_str() );
281  }
282 
283  pspmat.nzvalLocal.Resize( numRead );
284  MPI_Recv( pspmat.nzvalLocal.Data(), numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
285  }
286 
287  // Close the file
288  if( mpirank == 0 ){
289  fin.close();
290  }
291 
292 
293 
294  MPI_Barrier( comm );
295 
296 #ifndef _RELEASE_
297  PopCallStack();
298 #endif
299 
300  return ;
301 } // ----- end of function ReadDistSparseMatrix -----
302 
303 inline void ParaWriteDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
304 {
305 #ifndef _RELEASE_
306  PushCallStack("ParaWriteDistSparseMatrix");
307 #endif
308  // Get the processor information within the current communicator
309  MPI_Barrier( comm );
310  Int mpirank; MPI_Comm_rank(comm, &mpirank);
311  Int mpisize; MPI_Comm_size(comm, &mpisize);
312  MPI_Status mpistat;
313  Int err = 0;
314 
315 
316 
317  int filemode = MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN;
318 
319  MPI_File fout;
320  MPI_Status status;
321 
322 
323 
324  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fout);
325 
326  if (err != MPI_SUCCESS) {
327  #ifdef USE_ABORT
328 abort();
329 #endif
330 throw std::logic_error( "File cannot be opened!" );
331  }
332 
333  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
334  // Write header
335  if( mpirank == 0 ){
336  err = MPI_File_write_at(fout, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
337  err = MPI_File_write_at(fout, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
338  }
339 
340 
341  // Compute the number of columns on each processor
342  Int numColLocal = pspmat.colptrLocal.m()-1;
343  Int numColFirst = pspmat.size / mpisize;
344  IntNumVec colptrChunk(numColLocal+1);
345 
346  Int prev_nz = 0;
347  MPI_Exscan(&pspmat.nnzLocal, &prev_nz, 1, MPI_INT, MPI_SUM, comm);
348 
349  for( Int i = 0; i < numColLocal + 1; i++ ){
350  colptrChunk[i] = pspmat.colptrLocal[i] + prev_nz;
351  }
352 
353 
354  MPI_Datatype memtype, filetype;
355  MPI_Aint disps[6];
356  int blklens[6];
357  MPI_Datatype types[6] = {MPI_INT,MPI_INT, MPI_INT,MPI_INT, MPI_INT,MPI_DOUBLE};
358 
359  /* set block lengths (same for both types) */
360  blklens[0] = (mpirank==0)?1:0;
361  blklens[1] = numColLocal+1;
362  blklens[2] = (mpirank==0)?1:0;
363  blklens[3] = pspmat.nnzLocal;
364  blklens[4] = (mpirank==0)?1:0;
365  blklens[5] = pspmat.nnzLocal;
366 
367 
368 
369 
370  //Calculate offsets
371  MPI_Offset myColPtrOffset, myRowIdxOffset, myNzValOffset;
372  myColPtrOffset = 3*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
373  myRowIdxOffset = 3*sizeof(int) + (pspmat.size +1 + prev_nz)*sizeof(Int);
374  myNzValOffset = 4*sizeof(int) + (pspmat.size +1 + pspmat.nnz)*sizeof(Int)+ prev_nz*sizeof(Real);
375  disps[0] = 2*sizeof(int);
376  disps[1] = myColPtrOffset;
377  disps[2] = myRowIdxOffset;
378  disps[3] = sizeof(int)+myRowIdxOffset;
379  disps[4] = myNzValOffset;
380  disps[5] = sizeof(int)+myNzValOffset;
381 
382 
383 
384 #if ( _DEBUGlevel_ >= 1 )
385  char msg[200];
386  char * tmp = msg;
387  tmp += sprintf(tmp,"P%d ",mpirank);
388  for(int i = 0; i<6; ++i){
389  if(i==5)
390  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(double));
391  else
392  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(int));
393  }
394  tmp += sprintf(tmp,"\n");
395  printf("%s",msg);
396 #endif
397 
398 
399 
400 
401  MPI_Type_create_struct(6, blklens, disps, types, &filetype);
402  MPI_Type_commit(&filetype);
403 
404  /* create memory type */
405  Int np1 = pspmat.size+1;
406  MPI_Address( (void *)&np1, &disps[0]);
407  MPI_Address(colptrChunk.Data(), &disps[1]);
408  MPI_Address( (void *)&pspmat.nnz, &disps[2]);
409  MPI_Address((void *)pspmat.rowindLocal.Data(), &disps[3]);
410  MPI_Address( (void *)&pspmat.nnz, &disps[4]);
411  MPI_Address((void *)pspmat.nzvalLocal.Data(), &disps[5]);
412 
413  MPI_Type_create_struct(6, blklens, disps, types, &memtype);
414  MPI_Type_commit(&memtype);
415 
416 
417 
418  /* set file view */
419  err = MPI_File_set_view(fout, 0, MPI_BYTE, filetype, "native",MPI_INFO_NULL);
420 
421  /* everyone writes their own row offsets, columns, and
422  * data with one big noncontiguous write (in memory and
423  * file)
424  */
425  err = MPI_File_write_all(fout, MPI_BOTTOM, 1, memtype, &status);
426 
427  MPI_Type_free(&filetype);
428  MPI_Type_free(&memtype);
429 
430 
431 
432 
433 
434  MPI_Barrier( comm );
435 
436  MPI_File_close(&fout);
437 #ifndef _RELEASE_
438  PopCallStack();
439 #endif
440 
441  return ;
442 } // ----- end of function ParaWriteDistSparseMatrix -----
443 
444 inline void ParaReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
445 {
446 #ifndef _RELEASE_
447  PushCallStack("ParaReadDistSparseMatrix");
448 #endif
449  // Get the processor information within the current communicator
450  MPI_Barrier( comm );
451  Int mpirank; MPI_Comm_rank(comm, &mpirank);
452  Int mpisize; MPI_Comm_size(comm, &mpisize);
453  MPI_Status mpistat;
454  MPI_Datatype type;
455  int lens[3];
456  MPI_Aint disps[3];
457  MPI_Datatype types[3];
458  Int err = 0;
459 
460  int filemode = MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN;
461 
462  MPI_File fin;
463  MPI_Status status;
464 
465  // FIXME Maybe change to MPI_Comm_Dup.
466  pspmat.comm = comm;
467 
468  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fin);
469 
470  if (err != MPI_SUCCESS) {
471  #ifdef USE_ABORT
472 abort();
473 #endif
474 throw std::logic_error( "File cannot be opened!" );
475  }
476 
477  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
478  // Read header
479  if( mpirank == 0 ){
480  err = MPI_File_read_at(fin, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
481  err = MPI_File_read_at(fin, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
482  }
483 
484 
485  /* define a struct that describes all our data */
486  lens[0] = 1;
487  lens[1] = 1;
488  MPI_Address(&pspmat.size, &disps[0]);
489  MPI_Address(&pspmat.nnz, &disps[1]);
490  types[0] = MPI_INT;
491  types[1] = MPI_INT;
492  MPI_Type_struct(2, lens, disps, types, &type);
493  MPI_Type_commit(&type);
494 
495 
496  /* broadcast the header data to everyone */
497  MPI_Bcast(MPI_BOTTOM, 1, type, 0, comm);
498 
499  MPI_Type_free(&type);
500 
501  // Compute the number of columns on each processor
502  IntNumVec numColLocalVec(mpisize);
503  Int numColLocal, numColFirst;
504  numColFirst = pspmat.size / mpisize;
505  SetValue( numColLocalVec, numColFirst );
506  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
507  numColLocal = numColLocalVec[mpirank];
508  pspmat.colptrLocal.Resize( numColLocal + 1 );
509 
510 
511 
512  MPI_Offset myColPtrOffset = (2 + ((mpirank==0)?0:1) )*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
513 
514  Int np1 = 0;
515  lens[0] = (mpirank==0)?1:0;
516  lens[1] = numColLocal + 1;
517 
518  MPI_Address(&np1, &disps[0]);
519  MPI_Address(pspmat.colptrLocal.Data(), &disps[1]);
520 
521  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
522  MPI_Type_commit(&type);
523 
524  err= MPI_File_read_at_all(fin, myColPtrOffset, MPI_BOTTOM, 1, type, &status);
525 
526  if (err != MPI_SUCCESS) {
527 #ifdef USE_ABORT
528  abort();
529 #endif
530  throw std::logic_error( "error reading colptr" );
531  }
532  MPI_Type_free(&type);
533 
534  // Calculate nnz_loc on each processor
535  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
536 
537 
538  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
539  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
540 
541  //read rowIdx
542  MPI_Offset myRowIdxOffset = (3 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + (pspmat.colptrLocal[0]-1))*sizeof(int);
543 
544  lens[0] = (mpirank==0)?1:0;
545  lens[1] = pspmat.nnzLocal;
546 
547  MPI_Address(&np1, &disps[0]);
548  MPI_Address(pspmat.rowindLocal.Data(), &disps[1]);
549 
550  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
551  MPI_Type_commit(&type);
552 
553  err= MPI_File_read_at_all(fin, myRowIdxOffset, MPI_BOTTOM, 1, type,&status);
554 
555  if (err != MPI_SUCCESS) {
556  #ifdef USE_ABORT
557 abort();
558 #endif
559 throw std::logic_error( "error reading rowind" );
560  }
561  MPI_Type_free(&type);
562 
563 
564  //read nzval
565  MPI_Offset myNzValOffset = (4 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(Int) + (pspmat.colptrLocal[0]-1)*sizeof(double);
566 
567  lens[0] = (mpirank==0)?1:0;
568  lens[1] = pspmat.nnzLocal;
569 
570  MPI_Address(&np1, &disps[0]);
571  MPI_Address(pspmat.nzvalLocal.Data(), &disps[1]);
572 
573  types[0] = MPI_INT;
574  types[1] = MPI_DOUBLE;
575 
576  MPI_Type_create_struct(2, lens, disps, types, &type);
577  MPI_Type_commit(&type);
578 
579  err = MPI_File_read_at_all(fin, myNzValOffset, MPI_BOTTOM, 1, type,&status);
580 
581  if (err != MPI_SUCCESS) {
582  #ifdef USE_ABORT
583 abort();
584 #endif
585 throw std::logic_error( "error reading nzval" );
586  }
587 
588  MPI_Type_free(&type);
589 
590 
591  //convert to local references
592  for( Int i = 1; i < numColLocal + 1; i++ ){
593  pspmat.colptrLocal[i] = pspmat.colptrLocal[i] - pspmat.colptrLocal[0] + 1;
594  }
595  pspmat.colptrLocal[0]=1;
596 
597  MPI_Barrier( comm );
598 
599  MPI_File_close(&fin);
600 #ifndef _RELEASE_
601  PopCallStack();
602 #endif
603 
604  return ;
605 } // ----- end of function ParaReadDistSparseMatrix -----
606 
607 inline void ReadDistSparseMatrixFormatted ( const char* filename, DistSparseMatrix<Real>& pspmat, MPI_Comm comm )
608 {
609 #ifndef _RELEASE_
610  PushCallStack("ReadDistSparseMatrixFormatted");
611 #endif
612  // Get the processor information within the current communicator
613  MPI_Barrier( comm );
614  Int mpirank; MPI_Comm_rank(comm, &mpirank);
615  Int mpisize; MPI_Comm_size(comm, &mpisize);
616  MPI_Status mpistat;
617  std::ifstream fin;
618 
619  // FIXME Maybe change to MPI_Comm_Dup.
620  pspmat.comm = comm;
621 
622  // FIXME Maybe need LongInt format to read the number of nonzeros later
623 
624  // Read basic information
625  if( mpirank == 0 ){
626  fin.open(filename);
627  if( !fin.good() ){
628  #ifdef USE_ABORT
629 abort();
630 #endif
631 throw std::logic_error( "File cannot be opened!" );
632  }
633  Int dummy;
634 
635  fin >> pspmat.size >> dummy ;
636  fin >> pspmat.nnz >> dummy;
637  // FIXME this is temporary and only applies to 4*4 matrix.
638 // fin >> dummy;
639  }
640 
641  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
642  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
643 
644  // Read colptr
645 
646  IntNumVec colptr(pspmat.size+1);
647  if( mpirank == 0 ){
648  Int* ptr = colptr.Data();
649  for( Int i = 0; i < pspmat.size+1; i++ )
650  fin >> *(ptr++);
651  }
652 
653  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
654 
655  // Compute the number of columns on each processor
656  IntNumVec numColLocalVec(mpisize);
657  Int numColLocal, numColFirst;
658  numColFirst = pspmat.size / mpisize;
659  SetValue( numColLocalVec, numColFirst );
660  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
661  numColLocal = numColLocalVec[mpirank];
662 
663  pspmat.colptrLocal.Resize( numColLocal + 1 );
664  for( Int i = 0; i < numColLocal + 1; i++ ){
665  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
666  }
667 
668  // Calculate nnz_loc on each processor
669  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
670 
671  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
672  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
673 
674  // Read and distribute the row indices
675  if( mpirank == 0 ){
676  Int tmp;
677  IntNumVec buf;
678  Int numRead;
679  for( Int ip = 0; ip < mpisize; ip++ ){
680  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
681  colptr[ip*numColFirst];
682  buf.Resize(numRead);
683  Int *ptr = buf.Data();
684  for( Int i = 0; i < numRead; i++ ){
685  fin >> *(ptr++);
686  }
687  if( ip > 0 ){
688  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
689  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
690  }
691  else{
692  pspmat.rowindLocal = buf;
693  }
694  }
695  }
696  else{
697  Int numRead;
698  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
699  if( numRead != pspmat.nnzLocal ){
700  std::ostringstream msg;
701  msg << "The number of columns in row indices do not match." << std::endl
702  << "numRead = " << numRead << std::endl
703  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
704  #ifdef USE_ABORT
705 abort();
706 #endif
707 throw std::logic_error( msg.str().c_str() );
708  }
709 
710  pspmat.rowindLocal.Resize( numRead );
711  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
712  }
713 
714 // std::cout << "Proc " << mpirank << " outputs rowindLocal.size() = "
715 // << pspmat.rowindLocal.m() << endl;
716 
717 
718  // Read and distribute the nonzero values
719  if( mpirank == 0 ){
720  Int tmp;
721  NumVec<Real> buf;
722  Int numRead;
723  for( Int ip = 0; ip < mpisize; ip++ ){
724  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
725  colptr[ip*numColFirst];
726  buf.Resize(numRead);
727  Real *ptr = buf.Data();
728  for( Int i = 0; i < numRead; i++ ){
729  fin >> *(ptr++);
730  }
731  if( ip > 0 ){
732  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
733  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
734  }
735  else{
736  pspmat.nzvalLocal = buf;
737  }
738  }
739  }
740  else{
741  Int numRead;
742  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
743  if( numRead != pspmat.nnzLocal ){
744  std::ostringstream msg;
745  msg << "The number of columns in values do not match." << std::endl
746  << "numRead = " << numRead << std::endl
747  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
748  #ifdef USE_ABORT
749 abort();
750 #endif
751 throw std::logic_error( msg.str().c_str() );
752  }
753 
754  pspmat.nzvalLocal.Resize( numRead );
755  MPI_Recv( pspmat.nzvalLocal.Data(), numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
756  }
757 
758  // Close the file
759  if( mpirank == 0 ){
760  fin.close();
761  }
762 
763 
764 
765  MPI_Barrier( comm );
766 
767 #ifndef _RELEASE_
768  PopCallStack();
769 #endif
770 
771  return ;
772 } // ----- end of function ReadDistSparseMatrixFormatted -----
773 
774 
775 inline void ParaReadDistSparseMatrix ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
776 {
777 #ifndef _RELEASE_
778  PushCallStack("ParaReadDistSparseMatrix");
779 #endif
780  // Get the processor information within the current communicator
781  MPI_Barrier( comm );
782  Int mpirank; MPI_Comm_rank(comm, &mpirank);
783  Int mpisize; MPI_Comm_size(comm, &mpisize);
784  MPI_Status mpistat;
785  MPI_Datatype type;
786  int lens[3];
787  MPI_Aint disps[3];
788  MPI_Datatype types[3];
789  Int err = 0;
790 
791  int filemode = MPI_MODE_RDONLY | MPI_MODE_UNIQUE_OPEN;
792 
793  MPI_File fin;
794  MPI_Status status;
795 
796  // FIXME Maybe change to MPI_Comm_Dup.
797  pspmat.comm = comm;
798 
799  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fin);
800 
801  if (err != MPI_SUCCESS) {
802 #ifdef USE_ABORT
803  abort();
804 #endif
805  throw std::logic_error( "File cannot be opened!" );
806  }
807 
808  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
809  // Read header
810  if( mpirank == 0 ){
811  err = MPI_File_read_at(fin, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
812  err = MPI_File_read_at(fin, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
813  }
814 
815 
816  /* define a struct that describes all our data */
817  lens[0] = 1;
818  lens[1] = 1;
819  MPI_Address(&pspmat.size, &disps[0]);
820  MPI_Address(&pspmat.nnz, &disps[1]);
821  types[0] = MPI_INT;
822  types[1] = MPI_INT;
823  MPI_Type_struct(2, lens, disps, types, &type);
824  MPI_Type_commit(&type);
825 
826 
827  /* broadcast the header data to everyone */
828  MPI_Bcast(MPI_BOTTOM, 1, type, 0, comm);
829 
830  MPI_Type_free(&type);
831 
832  // Compute the number of columns on each processor
833  IntNumVec numColLocalVec(mpisize);
834  Int numColLocal, numColFirst;
835  numColFirst = pspmat.size / mpisize;
836  SetValue( numColLocalVec, numColFirst );
837  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
838  numColLocal = numColLocalVec[mpirank];
839  pspmat.colptrLocal.Resize( numColLocal + 1 );
840 
841 
842 
843  MPI_Offset myColPtrOffset = (2 + ((mpirank==0)?0:1) )*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
844 
845  Int np1 = 0;
846  lens[0] = (mpirank==0)?1:0;
847  lens[1] = numColLocal + 1;
848 
849  MPI_Address(&np1, &disps[0]);
850  MPI_Address(pspmat.colptrLocal.Data(), &disps[1]);
851 
852  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
853  MPI_Type_commit(&type);
854 
855  err= MPI_File_read_at_all(fin, myColPtrOffset, MPI_BOTTOM, 1, type, &status);
856 
857  if (err != MPI_SUCCESS) {
858 #ifdef USE_ABORT
859  abort();
860 #endif
861  throw std::logic_error( "error reading colptr" );
862  }
863  MPI_Type_free(&type);
864 
865  // Calculate nnz_loc on each processor
866  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
867 
868 
869  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
870  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
871 
872  //read rowIdx
873  MPI_Offset myRowIdxOffset = (3 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.colptrLocal[0]-1)*sizeof(Int);
874 
875  lens[0] = (mpirank==0)?1:0;
876  lens[1] = pspmat.nnzLocal;
877 
878  MPI_Address(&np1, &disps[0]);
879  MPI_Address(pspmat.rowindLocal.Data(), &disps[1]);
880 
881  MPI_Type_hindexed(2, lens, disps, MPI_INT, &type);
882  MPI_Type_commit(&type);
883 
884  err= MPI_File_read_at_all(fin, myRowIdxOffset, MPI_BOTTOM, 1, type,&status);
885 
886  if (err != MPI_SUCCESS) {
887 #ifdef USE_ABORT
888  abort();
889 #endif
890  throw std::logic_error( "error reading rowind" );
891  }
892  MPI_Type_free(&type);
893 
894 
895  //read nzval
896  MPI_Offset myNzValOffset = (4 + ((mpirank==0)?0:1) )*sizeof(int) + (pspmat.size+1 + pspmat.nnz)*sizeof(int) + (pspmat.colptrLocal[0]-1)*sizeof(Complex);
897 
898  lens[0] = (mpirank==0)?1:0;
899  lens[1] = pspmat.nnzLocal;
900 
901  MPI_Address(&np1, &disps[0]);
902  MPI_Address(pspmat.nzvalLocal.Data(), &disps[1]);
903 
904  types[0] = MPI_INT;
905  types[1] = MPI_DOUBLE_COMPLEX;
906 
907  MPI_Type_create_struct(2, lens, disps, types, &type);
908  MPI_Type_commit(&type);
909 
910  err = MPI_File_read_at_all(fin, myNzValOffset, MPI_BOTTOM, 1, type,&status);
911 
912  if (err != MPI_SUCCESS) {
913 #ifdef USE_ABORT
914  abort();
915 #endif
916  throw std::logic_error( "error reading nzval" );
917  }
918 
919  MPI_Type_free(&type);
920 
921 
922  //convert to local references
923  for( Int i = 1; i < numColLocal + 1; i++ ){
924  pspmat.colptrLocal[i] = pspmat.colptrLocal[i] - pspmat.colptrLocal[0] + 1;
925  }
926  pspmat.colptrLocal[0]=1;
927 
928  MPI_Barrier( comm );
929 
930  MPI_File_close(&fin);
931 #ifndef _RELEASE_
932  PopCallStack();
933 #endif
934 
935  return ;
936 } // ----- end of function ParaReadDistSparseMatrix -----
937 
938 inline void ParaWriteDistSparseMatrix ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
939 {
940 #ifndef _RELEASE_
941  PushCallStack("ParaWriteDistSparseMatrix");
942 #endif
943  // Get the processor information within the current communicator
944  MPI_Barrier( comm );
945  Int mpirank; MPI_Comm_rank(comm, &mpirank);
946  Int mpisize; MPI_Comm_size(comm, &mpisize);
947  MPI_Status mpistat;
948  Int err = 0;
949 
950 
951 
952  int filemode = MPI_MODE_WRONLY | MPI_MODE_CREATE | MPI_MODE_UNIQUE_OPEN;
953 
954  MPI_File fout;
955  MPI_Status status;
956 
957 
958 
959  err = MPI_File_open(comm,(char*) filename, filemode, MPI_INFO_NULL, &fout);
960 
961  if (err != MPI_SUCCESS) {
962  #ifdef USE_ABORT
963 abort();
964 #endif
965 throw std::logic_error( "File cannot be opened!" );
966  }
967 
968  // FIXME Note that nnz uses the Int data type for consistency of writing / reading
969  // Write header
970  if( mpirank == 0 ){
971  err = MPI_File_write_at(fout, 0,(char*)&pspmat.size, 1, MPI_INT, &status);
972  err = MPI_File_write_at(fout, sizeof(Int),(char*)&pspmat.nnz, 1, MPI_INT, &status);
973  }
974 
975 
976  // Compute the number of columns on each processor
977  Int numColLocal = pspmat.colptrLocal.m()-1;
978  Int numColFirst = pspmat.size / mpisize;
979  IntNumVec colptrChunk(numColLocal+1);
980 
981  Int prev_nz = 0;
982  MPI_Exscan(&pspmat.nnzLocal, &prev_nz, 1, MPI_INT, MPI_SUM, comm);
983 
984  for( Int i = 0; i < numColLocal + 1; i++ ){
985  colptrChunk[i] = pspmat.colptrLocal[i] + prev_nz;
986  }
987 
988 
989  MPI_Datatype memtype, filetype;
990  MPI_Aint disps[6];
991  int blklens[6];
992  MPI_Datatype types[6] = {MPI_INT,MPI_INT, MPI_INT,MPI_INT, MPI_INT,MPI_DOUBLE_COMPLEX};
993 
994  /* set block lengths (same for both types) */
995  blklens[0] = (mpirank==0)?1:0;
996  blklens[1] = numColLocal+1;
997  blklens[2] = (mpirank==0)?1:0;
998  blklens[3] = pspmat.nnzLocal;
999  blklens[4] = (mpirank==0)?1:0;
1000  blklens[5] = pspmat.nnzLocal;
1001 
1002 
1003 
1004 
1005  //Calculate offsets
1006  MPI_Offset myColPtrOffset, myRowIdxOffset, myNzValOffset;
1007  myColPtrOffset = 3*sizeof(int) + (mpirank*numColFirst)*sizeof(Int);
1008  myRowIdxOffset = 3*sizeof(int) + (pspmat.size +1 + prev_nz)*sizeof(Int);
1009  myNzValOffset = 4*sizeof(int) + (pspmat.size +1 + pspmat.nnz)*sizeof(Int)+ prev_nz*sizeof(Complex);
1010  disps[0] = 2*sizeof(int);
1011  disps[1] = myColPtrOffset;
1012  disps[2] = myRowIdxOffset;
1013  disps[3] = sizeof(int)+myRowIdxOffset;
1014  disps[4] = myNzValOffset;
1015  disps[5] = sizeof(int)+myNzValOffset;
1016 
1017 
1018 
1019 #if ( _DEBUGlevel_ >= 1 )
1020  char msg[200];
1021  char * tmp = msg;
1022  tmp += sprintf(tmp,"P%d ",mpirank);
1023  for(int i = 0; i<6; ++i){
1024  if(i==5)
1025  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(Complex));
1026  else
1027  tmp += sprintf(tmp, "%d [%d - %d] | ",i,disps[i],disps[i]+blklens[i]*sizeof(int));
1028  }
1029  tmp += sprintf(tmp,"\n");
1030  printf("%s",msg);
1031 #endif
1032 
1033 
1034 
1035 
1036  MPI_Type_create_struct(6, blklens, disps, types, &filetype);
1037  MPI_Type_commit(&filetype);
1038 
1039  /* create memory type */
1040  Int np1 = pspmat.size+1;
1041  MPI_Address( (void *)&np1, &disps[0]);
1042  MPI_Address(colptrChunk.Data(), &disps[1]);
1043  MPI_Address( (void *)&pspmat.nnz, &disps[2]);
1044  MPI_Address((void *)pspmat.rowindLocal.Data(), &disps[3]);
1045  MPI_Address( (void *)&pspmat.nnz, &disps[4]);
1046  MPI_Address((void *)pspmat.nzvalLocal.Data(), &disps[5]);
1047 
1048  MPI_Type_create_struct(6, blklens, disps, types, &memtype);
1049  MPI_Type_commit(&memtype);
1050 
1051 
1052 
1053  /* set file view */
1054  err = MPI_File_set_view(fout, 0, MPI_BYTE, filetype, "native",MPI_INFO_NULL);
1055 
1056  /* everyone writes their own row offsets, columns, and
1057  * data with one big noncontiguous write (in memory and
1058  * file)
1059  */
1060  err = MPI_File_write_all(fout, MPI_BOTTOM, 1, memtype, &status);
1061 
1062  MPI_Type_free(&filetype);
1063  MPI_Type_free(&memtype);
1064 
1065 
1066 
1067 
1068 
1069  MPI_Barrier( comm );
1070 
1071  MPI_File_close(&fout);
1072 #ifndef _RELEASE_
1073  PopCallStack();
1074 #endif
1075 
1076  return ;
1077 } // ----- end of function ParaWriteDistSparseMatrix -----
1078 
1079 
1080 
1081 inline void ReadDistSparseMatrixFormatted ( const char* filename, DistSparseMatrix<Complex>& pspmat, MPI_Comm comm )
1082 {
1083 #ifndef _RELEASE_
1084  PushCallStack("ReadDistSparseMatrixFormatted");
1085 #endif
1086  // Get the processor information within the current communicator
1087  MPI_Barrier( comm );
1088  Int mpirank; MPI_Comm_rank(comm, &mpirank);
1089  Int mpisize; MPI_Comm_size(comm, &mpisize);
1090  MPI_Status mpistat;
1091  std::ifstream fin;
1092 
1093  // FIXME Maybe change to MPI_Comm_Dup.
1094  pspmat.comm = comm;
1095 
1096  // FIXME Maybe need LongInt format to read the number of nonzeros later
1097 
1098  // Read basic information
1099  if( mpirank == 0 ){
1100  fin.open(filename);
1101  if( !fin.good() ){
1102  #ifdef USE_ABORT
1103 abort();
1104 #endif
1105 throw std::logic_error( "File cannot be opened!" );
1106  }
1107  Int dummy;
1108 
1109  fin >> pspmat.size >> dummy;
1110  fin >> pspmat.nnz >> dummy;
1111  // FIXME this is temporary and only applies to 4*4 matrix.
1112 // fin >> dummy;
1113  }
1114 
1115  MPI_Bcast(&pspmat.size, 1, MPI_INT, 0, comm);
1116  MPI_Bcast(&pspmat.nnz, 1, MPI_INT, 0, comm);
1117 
1118  // Read colptr
1119 
1120  IntNumVec colptr(pspmat.size+1);
1121  if( mpirank == 0 ){
1122  Int* ptr = colptr.Data();
1123  for( Int i = 0; i < pspmat.size+1; i++ )
1124  fin >> *(ptr++);
1125  }
1126 
1127  MPI_Bcast(colptr.Data(), pspmat.size+1, MPI_INT, 0, comm);
1128 
1129  // Compute the number of columns on each processor
1130  IntNumVec numColLocalVec(mpisize);
1131  Int numColLocal, numColFirst;
1132  numColFirst = pspmat.size / mpisize;
1133  SetValue( numColLocalVec, numColFirst );
1134  numColLocalVec[mpisize-1] = pspmat.size - numColFirst * (mpisize-1); // Modify the last entry
1135  numColLocal = numColLocalVec[mpirank];
1136 
1137  pspmat.colptrLocal.Resize( numColLocal + 1 );
1138  for( Int i = 0; i < numColLocal + 1; i++ ){
1139  pspmat.colptrLocal[i] = colptr[mpirank * numColFirst+i] - colptr[mpirank * numColFirst] + 1;
1140  }
1141 
1142  // Calculate nnz_loc on each processor
1143  pspmat.nnzLocal = pspmat.colptrLocal[numColLocal] - pspmat.colptrLocal[0];
1144 
1145  pspmat.rowindLocal.Resize( pspmat.nnzLocal );
1146  pspmat.nzvalLocal.Resize ( pspmat.nnzLocal );
1147 
1148  // Read and distribute the row indices
1149  if( mpirank == 0 ){
1150  Int tmp;
1151  IntNumVec buf;
1152  Int numRead;
1153  for( Int ip = 0; ip < mpisize; ip++ ){
1154  numRead = colptr[ip*numColFirst + numColLocalVec[ip]] -
1155  colptr[ip*numColFirst];
1156 
1157 
1158  buf.Resize(numRead);
1159  Int *ptr = buf.Data();
1160  for( Int i = 0; i < numRead; i++ ){
1161  fin >> *(ptr++);
1162  }
1163  if( ip > 0 ){
1164  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
1165  MPI_Send(buf.Data(), numRead, MPI_INT, ip, 1, comm);
1166  }
1167  else{
1168  pspmat.rowindLocal = buf;
1169  }
1170  }
1171  }
1172  else{
1173  Int numRead;
1174  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
1175  if( numRead != pspmat.nnzLocal ){
1176  std::ostringstream msg;
1177  msg << "The number of columns in row indices do not match." << std::endl
1178  << "numRead = " << numRead << std::endl
1179  << "nnzLocal = " << pspmat.nnzLocal << std::endl;
1180 #ifdef USE_ABORT
1181  abort();
1182 #endif
1183  throw std::logic_error( msg.str().c_str() );
1184  }
1185 
1186  pspmat.rowindLocal.Resize( numRead );
1187  MPI_Recv( pspmat.rowindLocal.Data(), numRead, MPI_INT, 0, 1, comm, &mpistat );
1188  }
1189 
1190 // std::cout << "Proc " << mpirank << " outputs rowindLocal.size() = "
1191 // << pspmat.rowindLocal.m() << endl;
1192 
1193 
1194  // Read and distribute the nonzero values
1195  if( mpirank == 0 ){
1196  Int tmp;
1197  NumVec<Real> buf;
1198  Int numRead;
1199  for( Int ip = 0; ip < mpisize; ip++ ){
1200  numRead = 2* (colptr[ip*numColFirst + numColLocalVec[ip]] -
1201  colptr[ip*numColFirst]);
1202  Real *ptr;
1203  if( ip > 0 ){
1204  buf.Resize(numRead);
1205  ptr = buf.Data();
1206  }
1207  else{
1208  ptr = reinterpret_cast<Real*>(pspmat.nzvalLocal.Data());
1209  }
1210 
1211  //read data in either buf or pspmat.nzvalLocal
1212  for( Int i = 0; i < numRead; i++ ){
1213  fin >> *(ptr++);
1214  }
1215 
1216  if( ip > 0 ){
1217  MPI_Send(&numRead, 1, MPI_INT, ip, 0, comm);
1218  MPI_Send(buf.Data(), numRead, MPI_DOUBLE, ip, 1, comm);
1219  }
1220 // else{
1221 // std::copy( buf.Data(), buf.Data() + numRead, pspmat.nzvalLocal.Data() );
1222 // memcpy( (void*)( pspmat.nzvalLocal.Data() ),
1223 // (void*)buf.Data(), sizeof(Real)*numRead );
1224 // }
1225  }
1226  }
1227  else{
1228  Int numRead;
1229  MPI_Recv(&numRead, 1, MPI_INT, 0, 0, comm, &mpistat);
1230  if( numRead != 2*pspmat.nnzLocal ){
1231  std::ostringstream msg;
1232  msg << "The number of columns in values do not match." << std::endl
1233  << "numRead = " << numRead << std::endl
1234  << "2*nnzLocal = " << 2*pspmat.nnzLocal << std::endl;
1235 #ifdef USE_ABORT
1236  abort();
1237 #endif
1238  throw std::logic_error( msg.str().c_str() );
1239  }
1240 
1241  pspmat.nzvalLocal.Resize( numRead/2 );
1242  MPI_Recv( reinterpret_cast<Real*>(pspmat.nzvalLocal.Data()),
1243  numRead, MPI_DOUBLE, 0, 1, comm, &mpistat );
1244  }
1245 
1246  // Close the file
1247  if( mpirank == 0 ){
1248  fin.close();
1249  }
1250 
1251 
1252 
1253  MPI_Barrier( comm );
1254 
1255 #ifndef _RELEASE_
1256  PopCallStack();
1257 #endif
1258 
1259  return ;
1260 } // ----- end of function ReadDistSparseMatrixFormatted -----
1261 
1262 
1263  template<typename T> inline void GetDiagonal ( const DistSparseMatrix<T>& A, NumVec<T>& diag )
1264 {
1265 #ifndef _RELEASE_
1266  PushCallStack("GetDiagonal");
1267 #endif
1268  Int mpirank, mpisize;
1269  MPI_Comm_rank( A.comm, &mpirank );
1270  MPI_Comm_size( A.comm, &mpisize );
1271 
1272  NumVec<T> diagLocal( A.size );
1273  SetValue( diagLocal, ZERO<T>() );
1274  diag.Resize( A.size );
1275  SetValue( diag, ZERO<T>() );
1276 
1277  Int numColFirst = A.size / mpisize;
1278  Int firstCol = mpirank * numColFirst;
1279  Int numColLocal = A.colptrLocal.m() - 1;
1280 
1281 #if ( _DEBUGlevel_ >= 1 )
1282  statusOFS << "numColFirst = " << numColFirst << std::endl;
1283  statusOFS << "A.nzvalLocal.size = " << A.nzvalLocal.m() << std::endl;
1284  statusOFS << "A.nnzLocal = " << A.nnzLocal << std::endl;
1285 #endif
1286 
1287  // Note that the indices in DistSparseMatrix follows the FORTRAN convention
1288  for( Int j = 0; j < numColLocal; j++ ){
1289  Int jcol = j + firstCol + 1;
1290  Int numRow = A.colptrLocal(j+1) - A.colptrLocal(j);
1291  const Int* rowPtr = &A.rowindLocal( A.colptrLocal(j) - 1 );
1292  // NOTE: The rows in DistSparseMatrix are not necessarily ordered.
1293  // So lower_bound cannot be used here for fast searching. find has to be used.
1294  const Int* ptr = find( rowPtr, rowPtr + numRow, jcol );
1295  if( ptr == rowPtr + numRow ){
1296  std::ostringstream msg;
1297  msg << "Serious problem. Did not find the row corresponding to the column." << std::endl
1298  << "This happens when j = " << j << ", jcol = " << jcol << ", and the row indices are " << std::endl
1299  << IntNumVec( numRow, false, const_cast<Int*>(rowPtr) ) << std::endl;
1300  #ifdef USE_ABORT
1301 abort();
1302 #endif
1303 throw std::logic_error( msg.str().c_str() );
1304  }
1305  Int diagIdx = ptr - A.rowindLocal.Data();
1306  diagLocal( jcol - 1 ) = A.nzvalLocal( diagIdx );
1307  }
1308 
1309  mpi::Allreduce( &diagLocal[0], &diag[0], A.size, MPI_SUM, A.comm );
1310 
1311 #ifndef _RELEASE_
1312  PopCallStack();
1313 #endif
1314 
1315  return ;
1316 } // ----- end of function GetDiagonal -----
1317 
1318 
1319 
1320 
1321 template <typename T> void CSCToCSR(const DistSparseMatrix<T>& sparseA, DistSparseMatrix<T> & sparseB ){
1322 
1323 
1324  Int mpirank;
1325  MPI_Comm_rank(sparseA.comm,&mpirank);
1326 
1327  Int mpisize;
1328  MPI_Comm_size(sparseA.comm,&mpisize);
1329 
1330  Int numRowLocalFirst = sparseA.size / mpisize;
1331  Int firstRow = mpirank * numRowLocalFirst;
1332  Int numRowLocal = -1;
1333  Int nnzLocal = -1;
1334 
1335  sparseB.size = sparseA.size;
1336  sparseB.nnz = sparseA.nnz;
1337  sparseB.comm = sparseA.comm;
1338 
1339 
1340  LongInt nnz = 0;
1341  IntNumVec rowindGlobal;
1342  IntNumVec colptrGlobal;
1343  //TIMER_START(ToGlobalStructure);
1344  {
1345  colptrGlobal.Resize(sparseA.size+1);
1346 
1347  /* Allgatherv for row indices. */
1348  IntNumVec prevnz(mpisize);
1349  IntNumVec rcounts(mpisize);
1350  MPI_Allgather(&sparseA.nnzLocal, 1, MPI_INT, rcounts.Data(), 1, MPI_INT, sparseA.comm);
1351 
1352  prevnz[0] = 0;
1353  for (Int i = 0; i < mpisize-1; ++i) { prevnz[i+1] = prevnz[i] + rcounts[i]; }
1354 
1355  nnz = 0;
1356  for (Int i = 0; i < mpisize; ++i) { nnz += rcounts[i]; }
1357  rowindGlobal.Resize(nnz);
1358 
1359  MPI_Allgatherv(sparseA.rowindLocal.Data(), sparseA.nnzLocal, MPI_INT, rowindGlobal.Data(),rcounts.Data(), prevnz.Data(), MPI_INT, sparseA.comm);
1360 
1361  /* Allgatherv for colptr */
1362  // Compute the number of columns on each processor
1363  Int numColFirst = std::max(1,sparseA.size / mpisize);
1364  SetValue( rcounts, numColFirst );
1365  rcounts[mpisize-1] = sparseA.size - numColFirst * (mpisize-1); // Modify the last entry
1366 
1367  IntNumVec rdispls(mpisize);
1368  rdispls[0] = 0;
1369  for (Int i = 0; i < mpisize-1; ++i) { rdispls[i+1] = rdispls[i] + rcounts[i]; }
1370 
1371  MPI_Allgatherv(sparseA.colptrLocal.Data(), sparseA.colptrLocal.m()-1, MPI_INT, colptrGlobal.Data(),
1372  rcounts.Data(), rdispls.Data(), MPI_INT, sparseA.comm);
1373 
1374  /* Recompute column pointers. */
1375  for (Int p = 1; p < mpisize; p++) {
1376  Int idx = rdispls[p];
1377  for (Int j = 0; j < rcounts[p]; ++j) colptrGlobal[idx++] += prevnz[p];
1378  }
1379  colptrGlobal(sparseA.size)= nnz+1;
1380  }
1381  //TIMER_STOP(ToGlobalStructure);
1382 
1383 
1384  IntNumVec rowptrGlobal;
1385  IntNumVec colindGlobal;
1386  //Compute Global CSR structure and Local CSR structure
1387  {
1388  Int i, j;
1389 
1390  colindGlobal.Resize(nnz);
1391  rowptrGlobal.Resize(sparseA.size+1);
1392  IntNumVec row_nnz(sparseA.size);
1393  SetValue(row_nnz,I_ZERO);
1394 
1395  for (i = 0; i < nnz; i++) {
1396  Int k = rowindGlobal[i];
1397  row_nnz[k-1]++;
1398  }
1399 
1400  rowptrGlobal[0]=1;
1401  for (i = 1; i <= sparseA.size; i++)
1402  {
1403  rowptrGlobal[i] = rowptrGlobal[i-1] + row_nnz[i-1];
1404  row_nnz[i-1] = 0;
1405  }
1406 
1407  /*
1408  * convert from CSC to CSR.
1409  * use row_nnz to keep track of the number of non-zeros
1410  * added to each row.
1411  */
1412  for (j = 0; j < colptrGlobal.m()-1; j++)
1413  {
1414  Int k;
1415  Int nnz_col; /* # of non-zeros in column j of A */
1416 
1417  nnz_col = colptrGlobal[j+1] - colptrGlobal[j];
1418 
1419  for (k = colptrGlobal[j]; k < colptrGlobal[j+1]; k++)
1420  {
1421  Int i = rowindGlobal[ k - 1 ]; /* row index */
1422  Int h = rowptrGlobal[i-1] + row_nnz[i-1]; /* non-zero position */
1423 
1424  /* add the non-zero A(i,j) to B */
1425  colindGlobal[ h - 1 ] = j+1;
1426  row_nnz[i-1]++;
1427  }
1428  }
1429 
1430 
1431 
1432  }
1433 
1434  //This assertion is only ok for structurally symmetric matrices
1435  //for(int i=0;i<rowptrGlobal.m();++i){ assert(rowptrGlobal[i]==colptrGlobal[i]); }
1436 
1437  //for(int i=1;i<rowptrGlobal.m();++i){
1438  // Int colbeg = rowptrGlobal[i-1];
1439  // Int colend = rowptrGlobal[i]-1;
1440  // for(Int j=colbeg;j<=colend;++j){
1441  // Int row = colindGlobal[j-1];
1442 
1443  // Int * ptr = std::find(&rowindGlobal[colbeg-1],&rowindGlobal[colend-1]+1,row);
1444  // if(ptr==&rowindGlobal[colend-1]+1){
1445  // abort();
1446  // }
1447  // }
1448  //}
1449 
1450  //compute the local CSR structure
1451  std::vector<Int > numRowVec(mpisize,numRowLocalFirst);
1452  numRowVec[mpisize-1] = sparseA.size - (mpisize-1)*numRowLocalFirst;
1453 
1454  numRowLocal = numRowVec[mpirank];
1455  Int myFirstCol = mpirank*numRowLocalFirst+1;
1456  Int myLastCol = myFirstCol + numRowLocal -1;
1457 
1458  sparseB.colptrLocal.Resize(numRowLocal+1);
1459  Int * rowptrLocal = &sparseB.colptrLocal[0];
1460 
1461  //copy local chunk of rowptr
1462  std::copy(&rowptrGlobal[myFirstCol-1],
1463  &rowptrGlobal[myLastCol]+1,
1464  rowptrLocal);
1465 
1466  nnzLocal = rowptrLocal[numRowLocal] - rowptrLocal[0];
1467 
1468  sparseB.nnzLocal = nnzLocal;
1469 
1470  sparseB.rowindLocal.Resize(nnzLocal);
1471  Int * colindLocal = &sparseB.rowindLocal[0];
1472 
1473  sparseB.nzvalLocal.Resize(nnzLocal);
1474  T * nzvalLocal = &sparseB.nzvalLocal[0];
1475 
1476  //copy local chunk of colind
1477  std::copy(&colindGlobal[rowptrLocal[0]-1],
1478  &colindGlobal[rowptrLocal[0]-1]+nnzLocal,
1479  colindLocal);
1480 
1481  //convert rowptr to local references
1482  for( Int i = 1; i < numRowLocal + 1; i++ ){
1483  rowptrLocal[i] = rowptrLocal[i] - rowptrLocal[0] + 1;
1484  }
1485  rowptrLocal[0]=1;
1486 
1487  //for(int i=0;i<numRowLocal;++i){
1488  // Int col = i+myFirstCol;
1489  // Int colbeg = rowptrLocal[i];
1490  // Int colend = rowptrLocal[i+1]-1;
1491 
1492  // for(Int j=colbeg;j<=colend;++j){
1493  // Int row = colindLocal[j-1];
1494 
1495  // const Int * ptr = std::find(&sparseA.rowindLocal[colbeg-1],&sparseA.rowindLocal[colend-1]+1,row);
1496  // if(ptr==&sparseA.rowindLocal[colend-1]+1){
1497  // abort();
1498  // }
1499  // }
1500 
1501  //}
1502 
1503 
1504  {
1505  //Redistribute the data
1506  //local pointer to head to rows in local CSR structure
1507  std::vector<Int> * row_nnz = new std::vector<Int>(numRowLocal,0);
1508 
1509  for(int p =0; p<mpisize;p++){
1510  std::vector< char > send_buffer;
1511  std::vector< char > recv_buffer;
1512  std::vector<int> displs(mpisize+1,0);
1513  std::vector<int> bufsizes(mpisize,0);
1514 
1515  //parse the Global CSC structure to count
1516  Int firstCol = p*numRowLocalFirst+1;
1517  Int lastCol = firstCol + numRowVec[p]-1;
1518  for(Int col = 1; col<colptrGlobal.m();++col){
1519  if(col>= firstCol && col<= lastCol){
1520  Int colbeg = colptrGlobal[col-1];
1521  Int colend = colptrGlobal[col]-1;
1522  for(Int i=colbeg;i<=colend;++i){
1523  Int row = rowindGlobal[i-1];
1524  //determine where it should be packed ?
1525  Int p_dest = min(mpisize-1,(row-1)/(numRowLocalFirst));
1526  bufsizes[p_dest]+=sizeof(T);
1527  }
1528  }
1529  else if(col>lastCol){
1530  break;
1531  }
1532  }
1533 
1534  //compute displacement (cum sum)
1535  std::partial_sum(bufsizes.begin(),bufsizes.end(),displs.begin()+1);
1536 
1537  if(mpirank==p){
1538  std::vector<int> bufpos(mpisize,0);
1539 
1540  //resize buffers
1541  Int send_buffer_size = displs[mpisize];
1542  send_buffer.resize(displs[mpisize]);
1543 
1544  //fill the buffers by parsing local CSC structure
1545  for(Int j = 1; j<sparseA.colptrLocal.m();++j){
1546  Int gCol = mpirank*numRowLocalFirst + j;
1547  Int colbeg = sparseA.colptrLocal[j-1];
1548  Int colend = sparseA.colptrLocal[j]-1;
1549  for(Int i=colbeg;i<=colend;++i){
1550  Int row = sparseA.rowindLocal[i-1];
1551  //determine where it should be packed ?
1552  Int p_dest = min(mpisize-1,(row-1)/(numRowLocalFirst));
1553  Int p_displs = displs[p_dest];
1554  Int p_bufpos = bufpos[p_dest];
1555  *((T *)&send_buffer.at( displs[p_dest] + bufpos[p_dest] )) = sparseA.nzvalLocal[i-1];
1556  //advance position
1557  bufpos[p_dest]+= sizeof(T);
1558  }
1559  }
1560  }
1561 
1562 
1563 
1564  Int recv_buffer_size = bufsizes[mpirank];
1565  recv_buffer.resize(bufsizes[mpirank]);
1566 
1567  //scatterv
1568  MPI_Scatterv(&send_buffer[0], &bufsizes[0], &displs[0], MPI_BYTE, &recv_buffer[0], bufsizes[mpirank], MPI_BYTE, p, sparseA.comm);
1569 
1570  //process data
1571  Int recv_pos = 0;
1572  //parse the Global CSC structure to count
1573  for(Int col = 1; col<colptrGlobal.m();++col){
1574  if(col>= firstCol && col<= lastCol){
1575  Int colbeg = colptrGlobal[col-1];
1576  Int colend = colptrGlobal[col]-1;
1577  for(Int i=colbeg;i<=colend;++i){
1578  Int row = rowindGlobal[i-1];
1579  Int p_dest = min(mpisize-1,(row-1)/(numRowLocalFirst));
1580  if(p_dest==mpirank){
1581  //compute local CSR coordinates
1582  Int local_row = row - mpirank*numRowLocalFirst;
1583  Int h = rowptrLocal[local_row-1] + row_nnz->at(local_row-1); /* non-zero position */
1584  *((T*)&nzvalLocal[h-1]) = *((T*)&recv_buffer.at(recv_pos));
1585  //advance position in CSR buffer
1586  row_nnz->at(local_row-1)++;
1587  //advance position in CSC recv_buffer
1588  recv_pos+=sizeof(T);
1589  }
1590  }
1591  }
1592  else if(col>lastCol){
1593  break;
1594  }
1595  }
1596  }
1597  delete row_nnz;
1598  }
1599 }
1600 
1601 
1602 
1603 
1604 }
1605 #endif //_PEXSI_UTILITY_IMPL_HPP_
void SetValue(NumMat< F > &M, F val)
SetValue sets a numerical matrix to a constant val.
Definition: NumMat_impl.hpp:171