66 struct SuperLUOptions;
68 typedef std::vector<bool> bitMask;
69 typedef std::map<bitMask , std::vector<Int> > bitMaskSet;
95 GridType( MPI_Comm Bcomm,
int nprow,
int npcol );
154 LBlock& operator = (
const LBlock& LB) {
162 friend std::ostream& operator<<(std::ostream& out,
const LBlock& vec)
164 out <<
"(" << vec.blockIdx <<
", " << vec.numRow <<
", " << vec.numCol <<std::endl<<
"rows " << vec.rows <<std::endl<<
"nzval " <<std::endl<< vec.nzval <<
")";
201 UBlock& operator = (
const UBlock& UB) {
210 friend std::ostream& operator<<(std::ostream& out,
const UBlock& vec)
212 out <<
"(" << vec.blockIdx <<
", " << vec.numRow <<
", " << vec.numCol <<std::endl<<
"cols " << vec.cols <<std::endl<<
"nzval " <<std::endl<< vec.nzval <<
")";
228 {
return g->mpirank; }
232 {
return g->mpirank / g->numProcCol; }
236 {
return g->mpirank % g->numProcCol; }
241 {
return bnum % g->numProcRow; }
246 {
return bnum % g->numProcCol; }
251 {
return (i%g->numProcRow) * g->numProcCol + j%g->numProcCol; }
256 {
return bnum / g->numProcRow; }
261 {
return bnum / g->numProcCol; }
266 {
return iLocal * g->numProcRow +
MYROW( g ); }
271 {
return jLocal * g->numProcCol +
MYCOL( g ); }
276 {
return (a%b) ? ( a/b + 1 ) : ( a/b ); }
280 {
return s->superIdx[i]; }
285 {
return s->superPtr[bnum]; }
292 {
return s->superPtr[bnum]; }
297 {
return s->superPtr[bnum+1] - s->superPtr[bnum]; }
301 {
return s->superPtr.m() - 1; }
306 {
return s->superIdx.m(); }
330 namespace LBlockMask{
341 Int
inline serialize(LBlock& val, std::ostream& os,
const std::vector<Int>& mask){
343 if(mask[i]==1) serialize(val.blockIdx, os, mask); i++;
344 if(mask[i]==1) serialize(val.numRow, os, mask); i++;
345 if(mask[i]==1) serialize(val.numCol, os, mask); i++;
346 if(mask[i]==1) serialize(val.rows, os, mask); i++;
347 if(mask[i]==1) serialize(val.nzval, os, mask); i++;
351 Int
inline deserialize(LBlock& val, std::istream& is,
const std::vector<Int>& mask){
353 if(mask[i]==1) deserialize(val.blockIdx, is, mask); i++;
354 if(mask[i]==1) deserialize(val.numRow, is, mask); i++;
355 if(mask[i]==1) deserialize(val.numCol, is, mask); i++;
356 if(mask[i]==1) deserialize(val.rows, is, mask); i++;
357 if(mask[i]==1) deserialize(val.nzval, is, mask); i++;
377 namespace UBlockMask{
388 Int
inline serialize(UBlock& val, std::ostream& os,
const std::vector<Int>& mask){
390 if(mask[i]==1) serialize(val.blockIdx, os, mask); i++;
391 if(mask[i]==1) serialize(val.numRow, os, mask); i++;
392 if(mask[i]==1) serialize(val.numCol, os, mask); i++;
393 if(mask[i]==1) serialize(val.cols, os, mask); i++;
394 if(mask[i]==1) serialize(val.nzval, os, mask); i++;
398 Int
inline deserialize(UBlock& val, std::istream& is,
const std::vector<Int>& mask){
400 if(mask[i]==1) deserialize(val.blockIdx, is, mask); i++;
401 if(mask[i]==1) deserialize(val.numRow, is, mask); i++;
402 if(mask[i]==1) deserialize(val.numCol, is, mask); i++;
403 if(mask[i]==1) deserialize(val.cols, is, mask); i++;
404 if(mask[i]==1) deserialize(val.nzval, is, mask); i++;
487 std::vector<std::vector<Int> > ColBlockIdx_;
488 std::vector<std::vector<Int> > RowBlockIdx_;
489 std::vector<std::vector<LBlock> > L_;
490 std::vector<std::vector<UBlock> > U_;
492 std::vector<std::vector<Int> > workingSet_;
511 bitMaskSet maskSendToBelow_;
512 std::vector<MPI_Comm> commSendToBelow_;
513 std::vector<MPI_Comm*> commSendToBelowPtr_;
514 std::vector<Int> commSendToBelowRoot_;
515 std::vector<bitMask *> commSendToBelowMaskPtr_;
516 std::vector<bitMask> commSendToBelowMask_;
519 bitMaskSet maskRecvFromBelow_;
520 std::vector<MPI_Comm> commRecvFromBelow_;
521 std::vector<MPI_Comm*> commRecvFromBelowPtr_;
522 std::vector<Int> commRecvFromBelowRoot_;
524 std::vector<bitMask *> commRecvFromBelowMaskPtr_;
525 std::vector<bitMask> commRecvFromBelowMask_;
529 bitMaskSet maskSendToRight_;
530 std::vector<MPI_Comm> commSendToRight_;
531 std::vector<MPI_Comm*> commSendToRightPtr_;
532 std::vector<Int> commSendToRightRoot_;
533 std::vector<bitMask *> commSendToRightMaskPtr_;
534 std::vector<bitMask> commSendToRightMask_;
542 SELINV_TAG_U_CONTENT,
544 SELINV_TAG_L_CONTENT,
547 SELINV_TAG_D_CONTENT,
555 struct SuperNodeBufferType{
558 std::vector<Int> RowLocalPtr;
559 std::vector<Int> BlockIdxLocal;
560 std::vector<char> SstrLcolSend;
561 std::vector<char> SstrUrowSend;
562 std::vector<char> SstrLcolRecv;
563 std::vector<char> SstrUrowRecv;
564 Int SizeSstrLcolSend;
565 Int SizeSstrUrowSend;
566 Int SizeSstrLcolRecv;
567 Int SizeSstrUrowRecv;
572 SuperNodeBufferType():
580 SuperNodeBufferType(Int &pIndex) :
591 inline void SelInvIntra_Collectives(Int lidx);
594 inline void SelInvIntra_P2p(Int lidx);
598 inline void SelInv_lookup_indexes(SuperNodeBufferType & snode, std::vector<LBlock> & LcolRecv, std::vector<UBlock> & UrowRecv,
NumMat<Scalar> & AinvBuf,
NumMat<Scalar> & UBuf);
601 inline void GetWorkSet(std::vector<Int> & snodeEtree, std::vector<std::vector<Int> > & WSet);
604 inline void UnpackData(SuperNodeBufferType & snode, std::vector<LBlock> & LcolRecv, std::vector<UBlock> & UrowRecv);
607 inline void ComputeDiagUpdate(SuperNodeBufferType & snode);
610 inline void SendRecvCD_UpdateU(std::vector<SuperNodeBufferType> & arrSuperNodes, Int stepSuper);
613 void getMaxCommunicatorSizes();
616 void ConstructCommunicators_Collectives(Int lidx);
620 void DestructCommunicators_Collectives( );
643 Int NumCol()
const {
return super_ -> superIdx.m(); }
645 Int NumSuper()
const {
return super_ ->superPtr.m() - 1; }
654 std::vector<Int> & ColBlockIdx(Int jLocal) {
return ColBlockIdx_[jLocal]; }
655 std::vector<Int> & RowBlockIdx(Int iLocal) {
return RowBlockIdx_[iLocal]; }
660 Int
NumBlockL( Int jLocal )
const {
return L_[jLocal].size(); }
664 Int
NumBlockU( Int iLocal )
const {
return U_[iLocal].size(); }
675 std::vector<LBlock>&
L( Int jLocal ) {
return L_[jLocal]; }
679 std::vector<UBlock>&
U( Int iLocal ) {
return U_[iLocal]; }
683 std::vector<std::vector<int> >&
WorkingSet( ) {
return workingSet_; }
687 Int
CountSendToRight(Int ksup) { Int count= std::count (isSendToRight_.VecData(ksup), isSendToRight_.VecData(ksup) + grid_->numProcCol,
true);
return (isSendToRight_(
MYCOL(grid_),ksup)?count-1:count); }
691 Int
CountRecvFromBelow(Int ksup) { Int count= std::count (isRecvFromBelow_.VecData(ksup), isRecvFromBelow_.VecData(ksup) + grid_->numProcRow,
true);
return (isRecvFromBelow_(
MYROW(grid_),ksup)?count-1:count); }
695 Int
CountSendToCrossDiagonal(Int ksup) { Int count= std::count (isSendToCrossDiagonal_.VecData(ksup), isSendToCrossDiagonal_.VecData(ksup) + grid_->numProcCol,
true);
return ((isSendToCrossDiagonal_(
MYCOL(grid_),ksup) &&
MYROW(grid_)==
PROW(ksup,grid_))?count-1:count); }
699 Int
CountRecvFromCrossDiagonal(Int ksup) { Int count= std::count (isRecvFromCrossDiagonal_.VecData(ksup), isRecvFromCrossDiagonal_.VecData(ksup) + grid_->numProcRow,
true);
return ((isRecvFromCrossDiagonal_(
MYROW(grid_),ksup) &&
MYCOL(grid_)==
PCOL(ksup,grid_))?count-1:count); }
706 void GetEtree(std::vector<Int> & etree_supno );
962 #endif // _PSELINV_HPP_
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:651
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:143
void PMatrixToDistSparseMatrix(DistSparseMatrix< Scalar > &A)
PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix for...
Definition: pselinv.cpp:4213
void GetDiagonal(NumVec< Scalar > &diag)
GetDiagonal extracts the diagonal elements of the PMatrix.
Definition: pselinv.cpp:4173
void ConstructCommunicationPattern_Collectives()
ConstructCommunicationPattern_Collectives constructs the communication pattern to be used later in th...
Definition: pselinv.cpp:2588
Int CountSendToCrossDiagonal(Int ksup)
CountSendToCrossDiagonal returns the number of cross diagonal processors with which current processor...
Definition: pselinv.hpp:695
A thin interface for passing parameters to set the SuperLU options.
Definition: superlu_dist_interf.hpp:82
std::vector< std::vector< int > > & WorkingSet()
WorkingSet returns the ordered list of supernodes which could be done in parallel.
Definition: pselinv.hpp:683
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:121
Inteface with SuperLU_Dist (version 3.0 and later)
NumMat< Scalar > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:149
Int PROW(Int bnum, const GridType *g)
PROW returns the processor row that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:240
Thin interface to LAPACK.
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:146
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:140
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:671
void SelInv_P2p()
Point-to-point version of the selected inversion.
Definition: pselinv.cpp:3668
NumMat< Scalar > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:196
Int MYPROC(const GridType *g)
MYPROC returns the current processor rank.
Definition: pselinv.hpp:227
void PreSelInv()
PreSelInv prepares the structure in L_ and U_ so that SelInv only involves matrix-matrix multiplicati...
Definition: pselinv.cpp:3787
Int PCOL(Int bnum, const GridType *g)
PCOL returns the processor column that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:245
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:137
Int LBi(Int bnum, const GridType *g)
LBi returns the local block number on the processor at processor row PROW( bnum, g )...
Definition: pselinv.hpp:255
Int CountRecvFromBelow(Int ksup)
CountRecvFromBelow returns the number of processors below the current processor from which it receive...
Definition: pselinv.hpp:691
Implementation of Numerical Vector.
Int CountRecvFromCrossDiagonal(Int ksup)
CountRecvFromCrossDiagonal returns the number of cross diagonal processors with which current process...
Definition: pselinv.hpp:699
Int SuperSize(Int bnum, const SuperNodeType *s)
SuperSize returns the size of the block bnum.
Definition: pselinv.hpp:296
Int LBj(Int bnum, const GridType *g)
LBj returns the local block number on the processor at processor column PCOL( bnum, g ).
Definition: pselinv.hpp:260
Int GBj(Int jLocal, const GridType *g)
GBj returns the global block number from a local block number in the column direction.
Definition: pselinv.hpp:270
Int NnzLocal()
NnzLocal computes the number of nonzero elements (L and U) saved locally.
Definition: pselinv.cpp:5050
std::vector< UBlock > & U(Int iLocal)
U returns the vector of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:679
Int NumCol(const SuperNodeType *s)
NumCol returns the total number of columns for a supernodal partiiton.
Definition: pselinv.hpp:305
void PMatrixToDistSparseMatrix2(const DistSparseMatrix< Scalar > &A, DistSparseMatrix< Scalar > &B)
PMatrixToDistSparseMatrix2 is a more efficient version which performs the same job as PMatrixToDistSp...
Definition: pselinv.cpp:4766
void ConstructCommunicationPattern_P2p()
ConstructCommunicationPattern_P2p constructs the communication pattern to be used later in the select...
Definition: pselinv.cpp:2175
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:648
Int FirstBlockRow(Int bnum, const SuperNodeType *s)
FirstBlockRow returns the first column of a block bnum. Note: the functionality of FirstBlockRow is e...
Definition: pselinv.hpp:291
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:134
Int PNUM(Int i, Int j, const GridType *g)
PNUM returns the processor rank that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:250
void SelInv_Collectives()
Collective communication version of the selected inversion.
Definition: pselinv.cpp:3698
Int GBi(Int iLocal, const GridType *g)
GBi returns the global block number from a local block number in the row direction.
Definition: pselinv.hpp:265
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:84
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:190
Interface with MPI to facilitate communication.
IntNumVec cols
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:193
Int MYCOL(const GridType *g)
MYCOL returns my processor column.
Definition: pselinv.hpp:235
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:667
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:184
void ConstructCommunicationPattern_Hybrid()
ConstructCommunicationPattern_Hybrid constructs the communication pattern to be used later in the sel...
Definition: pselinv.cpp:3122
Various utility subroutines.
Implementation of numerical matrix.
void SelInv_Hybrid(Int threshold)
Hybrid version of the selected inversion. This file is obsolete.
Definition: pselinv.cpp:3737
UBlock stores a nonzero block in the upper triangular part in PSelInv.
Definition: pselinv.hpp:181
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:187
Int FirstBlockCol(Int bnum, const SuperNodeType *s)
FirstBlockCol returns the first column of a block bnum.
Definition: pselinv.hpp:284
Int CountSendToRight(Int ksup)
CountSendToRight returns the number of processors to the right of current processor with which it has...
Definition: pselinv.hpp:687
void GetEtree(std::vector< Int > &etree_supno)
GetEtree computes the supernodal elimination tree to be used later in the pipelined selected inversio...
Definition: pselinv.cpp:844
Int NumBlockU(Int iLocal) const
NumBlockU returns the number of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:664
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: pselinv.hpp:473
Int BlockIdx(Int i, const SuperNodeType *s)
BlockIdx returns the block index of a column i.
Definition: pselinv.hpp:279
Implementation of sparse matrices.
void ConstructCommunicationPattern()
ConstructCommunicationPattern constructs the communication pattern to be used later in the selected i...
Definition: pselinv.cpp:2168
Int CEILING(Int a, Int b)
CEILING is used for computing the storage space for local number of blocks.
Definition: pselinv.hpp:275
void GetNegativeInertia(Real &inertia)
GetNegativeInertia computes the negative inertia of a PMatrix. This can be used to estimate e...
Definition: pselinv.cpp:5097
std::vector< LBlock > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:675
Int NumSuper(const SuperNodeType *s)
NumSuper returns the total number of supernodes.
Definition: pselinv.hpp:300
void SelInv()
SelInv is the main function for the selected inversion.
Definition: pselinv.cpp:3661
LongInt Nnz()
Nnz computes the total number of nonzero elements in the PMatrix.
Definition: pselinv.cpp:5080
DistSparseMatrix describes a Sparse matrix in the compressed sparse column format (CSC) and distribut...
Definition: sparse_matrix_decl.hpp:91
Int NumBlockL(Int jLocal) const
NumBlockL returns the number of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:660
Int MYROW(const GridType *g)
MYROW returns my processor row.
Definition: pselinv.hpp:231