46 #ifndef _PEXSI_PSELINV_HPP_
47 #define _PEXSI_PSELINV_HPP_
64 #include "pexsi/TreeBcast.hpp"
71 #define IDX_TO_TAG(lidx,tag,max) ((SELINV_TAG_COUNT*(lidx%(max+1))+(tag)))
72 #define IDX_TO_TAG2(sidx,lidx,tag) (SELINV_TAG_COUNT*(sidx)+(tag))
73 #define TAG_TO_IDX(tag,typetag) (((tag)-(typetag))/SELINV_TAG_COUNT)
81 enum MSGTYPE {LSIZE=0,LROWSIZE,USIZE,UCOLSIZE,LCONTENT,LROWCONTENT,UCONTENT,UCOLCONTENT,MSGCOUNT};
86 bool operator() (
int i,
int j) {
return ((lookup)(i)<(lookup)(j));}
91 typedef std::vector<bool> bitMask;
92 typedef std::map<bitMask , std::vector<Int> > bitMaskSet;
140 GridType( MPI_Comm Bcomm,
int nprow,
int npcol );
206 LBlock& operator = (
const LBlock& LB) {
214 friend std::ostream& operator<<(std::ostream& out,
const LBlock& vec)
216 out <<
"(" << vec.blockIdx <<
", " << vec.numRow <<
", " << vec.numCol <<std::endl<<
"rows " << vec.rows <<std::endl<<
"nzval " <<std::endl<< vec.nzval <<
")";
256 UBlock& operator = (
const UBlock& UB) {
265 friend std::ostream& operator<<(std::ostream& out,
const UBlock& vec)
267 out <<
"(" << vec.blockIdx <<
", " << vec.numRow <<
", " << vec.numCol <<std::endl<<
"cols " << vec.cols <<std::endl<<
"nzval " <<std::endl<< vec.nzval <<
")";
281 {
return g->mpirank; }
285 {
return g->mpirank / g->numProcCol; }
289 {
return g->mpirank % g->numProcCol; }
294 {
return bnum % g->numProcRow; }
299 {
return bnum % g->numProcCol; }
304 {
return (i%g->numProcRow) * g->numProcCol + j%g->numProcCol; }
309 {
return bnum / g->numProcRow; }
314 {
return bnum / g->numProcCol; }
319 {
return iLocal * g->numProcRow +
MYROW( g ); }
324 {
return jLocal * g->numProcCol +
MYCOL( g ); }
329 {
return (a%b) ? ( a/b + 1 ) : ( a/b ); }
333 {
return s->superIdx[i]; }
338 {
return s->superPtr[bnum]; }
345 {
return s->superPtr[bnum]; }
350 {
return s->superPtr[bnum+1] - s->superPtr[bnum]; }
354 {
return s->superPtr.m() - 1; }
359 {
return s->superIdx.m(); }
383 namespace LBlockMask{
396 Int
inline serialize(LBlock<T>& val, std::ostream& os,
const std::vector<Int>& mask){
397 if(mask[LBlockMask::BLOCKIDX]==1) serialize(val.blockIdx, os, mask);
398 if(mask[LBlockMask::NUMROW ]==1) serialize(val.numRow, os, mask);
399 if(mask[LBlockMask::NUMCOL ]==1) serialize(val.numCol, os, mask);
400 if(mask[LBlockMask::ROWS ]==1) serialize(val.rows, os, mask);
401 if(mask[LBlockMask::NZVAL ]==1) serialize(val.nzval, os, mask);
406 Int
inline deserialize(LBlock<T>& val, std::istream& is,
const std::vector<Int>& mask){
407 if(mask[LBlockMask::BLOCKIDX]==1) deserialize(val.blockIdx, is, mask);
408 if(mask[LBlockMask::NUMROW ]==1) deserialize(val.numRow, is, mask);
409 if(mask[LBlockMask::NUMCOL ]==1) deserialize(val.numCol, is, mask);
410 if(mask[LBlockMask::ROWS ]==1) deserialize(val.rows, is, mask);
411 if(mask[LBlockMask::NZVAL ]==1) deserialize(val.nzval, is, mask);
431 namespace UBlockMask{
443 Int
inline serialize(UBlock<T>& val, std::ostream& os,
const std::vector<Int>& mask){
445 if(mask[i]==1) serialize(val.blockIdx, os, mask); i++;
446 if(mask[i]==1) serialize(val.numRow, os, mask); i++;
447 if(mask[i]==1) serialize(val.numCol, os, mask); i++;
448 if(mask[i]==1) serialize(val.cols, os, mask); i++;
449 if(mask[i]==1) serialize(val.nzval, os, mask); i++;
454 Int
inline deserialize(UBlock<T>& val, std::istream& is,
const std::vector<Int>& mask){
456 if(mask[i]==1) deserialize(val.blockIdx, is, mask); i++;
457 if(mask[i]==1) deserialize(val.numRow, is, mask); i++;
458 if(mask[i]==1) deserialize(val.numCol, is, mask); i++;
459 if(mask[i]==1) deserialize(val.cols, is, mask); i++;
460 if(mask[i]==1) deserialize(val.nzval, is, mask); i++;
536 static PMatrix<T> *
Create(
const GridType * pGridType,
const SuperNodeType * pSuper,
const PSelInvOptions * pSelInvOpt ,
const SuperLUOptions * pLuOpt);
537 static PMatrix<T> *
Create(
const SuperLUOptions * pLuOpt);
544 SELINV_TAG_U_CONTENT,
546 SELINV_TAG_L_CONTENT,
549 SELINV_TAG_D_CONTENT,
551 SELINV_TAG_L_SIZE_CD,
552 SELINV_TAG_L_CONTENT_CD,
563 const GridType* grid_;
565 const SuperNodeType* super_;
567 const PSelInvOptions * options_;
568 const SuperLUOptions * optionsLU_;
573 std::vector<std::vector<Int> > ColBlockIdx_;
574 std::vector<std::vector<Int> > RowBlockIdx_;
575 std::vector<std::vector<LBlock<T> > > L_;
576 std::vector<std::vector<UBlock<T> > > U_;
578 std::vector<std::vector<Int> > workingSet_;
582 BolNumMat isSendToBelow_;
583 BolNumMat isSendToRight_;
584 BolNumVec isSendToDiagonal_;
585 BolNumMat isSendToCrossDiagonal_;
587 BolNumMat isRecvFromBelow_;
588 BolNumVec isRecvFromAbove_;
589 BolNumVec isRecvFromLeft_;
590 BolNumMat isRecvFromCrossDiagonal_;
594 std::vector<TreeBcast2<T> *> fwdToBelowTree2_;
595 std::vector<TreeBcast2<T> *> fwdToRightTree2_;
597 std::vector<TreeBcast *> fwdToBelowTree_;
598 std::vector<TreeBcast *> fwdToRightTree_;
599 std::vector<TreeReduce<T> *> redToLeftTree_;
600 std::vector<TreeReduce<T> *> redToAboveTree_;
607 std::vector<Int> RowLocalPtr;
608 std::vector<Int> BlockIdxLocal;
609 std::vector<char> SstrLcolSend;
610 std::vector<char> SstrUrowSend;
611 std::vector<char> SstrLcolRecv;
612 std::vector<char> SstrUrowRecv;
613 Int SizeSstrLcolSend;
614 Int SizeSstrUrowSend;
615 Int SizeSstrLcolRecv;
616 Int SizeSstrUrowRecv;
650 inline void GetWorkSet(std::vector<Int> & snodeEtree, std::vector<std::vector<Int> > & WSet);
659 inline void SendRecvCD_UpdateU(std::vector<SuperNodeBufferType > & arrSuperNodes, Int stepSuper);
679 Int NumCol()
const {
return super_ -> superIdx.m(); }
681 Int NumSuper()
const {
return super_ ->superPtr.m() - 1; }
690 std::vector< std::vector<Int> > & ColBlockIdx() {
return ColBlockIdx_; }
691 std::vector< std::vector<Int> > & RowBlockIdx() {
return RowBlockIdx_; }
692 std::vector<Int> & ColBlockIdx(Int jLocal) {
return ColBlockIdx_[jLocal]; }
693 std::vector<Int> & RowBlockIdx(Int iLocal) {
return RowBlockIdx_[iLocal]; }
698 Int
NumBlockL( Int jLocal )
const {
return L_[jLocal].size(); }
702 Int
NumBlockU( Int iLocal )
const {
return U_[iLocal].size(); }
713 std::vector<LBlock<T> >&
L( Int jLocal ) {
return L_[jLocal]; }
717 std::vector<UBlock<T> >&
U( Int iLocal ) {
return U_[iLocal]; }
721 std::vector<std::vector<int> >&
WorkingSet( ) {
return workingSet_; }
726 Int
CountSendToRight(Int ksup) { Int count= std::count (isSendToRight_.VecData(ksup), isSendToRight_.VecData(ksup) + grid_->numProcCol,
true);
return (isSendToRight_(
MYCOL(grid_),ksup)?count-1:count); }
730 Int
CountSendToBelow(Int ksup) { Int count= std::count (isSendToBelow_.VecData(ksup), isSendToBelow_.VecData(ksup) + grid_->numProcRow,
true);
return (isSendToBelow_(
MYROW(grid_),ksup)?count-1:count); }
734 Int
CountRecvFromBelow(Int ksup) { Int count= std::count (isRecvFromBelow_.VecData(ksup), isRecvFromBelow_.VecData(ksup) + grid_->numProcRow,
true);
return (isRecvFromBelow_(
MYROW(grid_),ksup)?count-1:count); }
738 Int
CountSendToCrossDiagonal(Int ksup) { Int count= std::count (isSendToCrossDiagonal_.VecData(ksup), isSendToCrossDiagonal_.VecData(ksup) + grid_->numProcCol,
true);
return ((isSendToCrossDiagonal_(
MYCOL(grid_),ksup) &&
MYROW(grid_)==
PROW(ksup,grid_))?count-1:count); }
742 Int
CountRecvFromCrossDiagonal(Int ksup) { Int count= std::count (isRecvFromCrossDiagonal_.VecData(ksup), isRecvFromCrossDiagonal_.VecData(ksup) + grid_->numProcRow,
true);
return ((isRecvFromCrossDiagonal_(
MYROW(grid_),ksup) &&
MYCOL(grid_)==
PCOL(ksup,grid_))?count-1:count); }
749 void GetEtree(std::vector<Int> & etree_supno );
933 void GetColumn ( Int colIdx,
NumVec<T>& col );
988 void CopyLU(
const PMatrix & C);
1000 #endif //_PEXSI_PSELINV_HPP_
virtual void PreSelInv()
PreSelInv prepares the structure in L_ and U_ so that SelInv only involves matrix-matrix multiplicati...
Definition: pselinv_impl.hpp:3711
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:237
void PMatrixToDistSparseMatrix(DistSparseMatrix< T > &A)
PMatrixToDistSparseMatrix converts the PMatrix into a distributed compressed sparse column matrix for...
Definition: pselinv_impl.hpp:4242
A thin interface for passing parameters to set the SuperLU options.
Definition: superlu_dist_internal.hpp:62
std::vector< LBlock< T > > & L(Int jLocal)
L returns the vector of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:713
SuperNodeType describes mapping between supernode and column, the permutation information, and potentially the elimination tree (not implemented here).
Definition: pselinv.hpp:166
static PMatrix< T > * Create(const GridType *pGridType, const SuperNodeType *pSuper, const PSelInvOptions *pSelInvOpt, const SuperLUOptions *pLuOpt)
Create is a factory method which returns a pointer either to a new PMatrix or to a new PMatrixUnsym d...
Definition: pselinv_impl.hpp:5282
Interface with SuperLU_Dist (version 3.0 and later)
void GetNegativeInertia(Real &inertia)
GetNegativeInertia computes the negative inertia of a PMatrix. This can be used to estimate e...
Int PROW(Int bnum, const GridType *g)
PROW returns the processor row that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:293
Thin interface to LAPACK.
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:188
void UnpackData(SuperNodeBufferType &snode, std::vector< LBlock< T > > &LcolRecv, std::vector< UBlock< T > > &UrowRecv)
UnpackData.
Definition: pselinv_impl.hpp:1096
Int maxPipelineDepth
The maximum pipeline depth.
Definition: pselinv.hpp:107
Int MYPROC(const GridType *g)
MYPROC returns the current processor rank.
Definition: pselinv.hpp:280
Int NumLocalBlockCol() const
NumLocalBlockCol returns the total number of block columns.
Definition: pselinv.hpp:684
Int PCOL(Int bnum, const GridType *g)
PCOL returns the processor column that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:298
Int LBi(Int bnum, const GridType *g)
LBi returns the local block number on the processor at processor row PROW( bnum, g )...
Definition: pselinv.hpp:308
Int SuperSize(Int bnum, const SuperNodeType *s)
SuperSize returns the size of the block bnum.
Definition: pselinv.hpp:349
Int LBj(Int bnum, const GridType *g)
LBj returns the local block number on the processor at processor column PCOL( bnum, g ).
Definition: pselinv.hpp:313
Int GBj(Int jLocal, const GridType *g)
GBj returns the global block number from a local block number in the column direction.
Definition: pselinv.hpp:323
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:191
Int NumCol(const SuperNodeType *s)
NumCol returns the total number of columns for a supernodal partiiton.
Definition: pselinv.hpp:358
Definition: pselinv.hpp:604
Int CountRecvFromCrossDiagonal(Int ksup)
CountRecvFromCrossDiagonal returns the number of cross diagonal processors with which current process...
Definition: pselinv.hpp:742
void GetDiagonal(NumVec< T > &diag)
GetDiagonal extracts the diagonal elements of the PMatrix.
Definition: pselinv_impl.hpp:4114
std::vector< std::vector< int > > & WorkingSet()
WorkingSet returns the ordered list of supernodes which could be done in parallel.
Definition: pselinv.hpp:721
Implementation of the parallel SelInv.
void GetWorkSet(std::vector< Int > &snodeEtree, std::vector< std::vector< Int > > &WSet)
GetWorkSet.
Definition: pselinv_impl.hpp:1271
Int FirstBlockRow(Int bnum, const SuperNodeType *s)
FirstBlockRow returns the first column of a block bnum. Note: the functionality of FirstBlockRow is e...
Definition: pselinv.hpp:344
Definition: pselinv.hpp:83
void GetEtree(std::vector< Int > &etree_supno)
GetEtree computes the supernodal elimination tree to be used later in the pipelined selected inversio...
Definition: pselinv_impl.hpp:1202
const GridType * Grid() const
Grid returns the GridType structure of the current PMatrix.
Definition: pselinv.hpp:705
LBlock stores a nonzero block in the lower triangular part or the diagonal part in PSelInv...
Definition: pselinv.hpp:182
Int PNUM(Int i, Int j, const GridType *g)
PNUM returns the processor rank that the bnum-th block (supernode) belongs to.
Definition: pselinv.hpp:303
virtual void ConstructCommunicationPattern()
ConstructCommunicationPattern constructs the communication pattern to be used later in the selected i...
Definition: pselinv_impl.hpp:2670
Sparse matrix and Distributed sparse matrix in compressed column format.
void ConstructCommunicationPattern_P2p()
ConstructCommunicationPattern_P2p constructs the communication pattern to be used later in the select...
Definition: pselinv_impl.hpp:2678
Int GBi(Int iLocal, const GridType *g)
GBi returns the global block number from a local block number in the row direction.
Definition: pselinv.hpp:318
GridType is the PSelInv way of defining the grid.
Definition: pselinv.hpp:129
void SelInvIntra_P2p(Int lidx, Int &rank)
SelInvIntra_P2p.
Definition: pselinv_impl.hpp:1511
Interface with MPI to facilitate communication.
Int MYCOL(const GridType *g)
MYCOL returns my processor column.
Definition: pselinv.hpp:288
Int CountSendToCrossDiagonal(Int ksup)
CountSendToCrossDiagonal returns the number of cross diagonal processors with which current processor...
Definition: pselinv.hpp:738
Int NumLocalBlockRow() const
NumLocalBlockRow returns the total number of block rows.
Definition: pselinv.hpp:687
Various utility subroutines.
const SuperNodeType * SuperNode() const
SuperNode returns the supernodal partition of the current PMatrix.
Definition: pselinv.hpp:709
void PMatrixToDistSparseMatrix_OLD(const DistSparseMatrix< T > &A, DistSparseMatrix< T > &B)
PMatrixToDistSparseMatrix_OLD converts the PMatrix into a distributed compressed sparse column matrix...
Definition: pselinv_impl.hpp:4523
IntNumVec rows
Dimension numRow * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:194
Int numCol
Number of nonzero columns.
Definition: pselinv.hpp:243
UBlock stores a nonzero block in the upper triangular part in PSelInv.
Definition: pselinv.hpp:234
std::vector< UBlock< T > > & U(Int iLocal)
U returns the vector of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:717
Int FirstBlockCol(Int bnum, const SuperNodeType *s)
FirstBlockCol returns the first column of a block bnum.
Definition: pselinv.hpp:337
Int NnzLocal()
NnzLocal computes the number of nonzero elements (L and U) saved locally.
Definition: pselinv_impl.hpp:5166
Int blockIdx
Block index (supernodal index)
Definition: pselinv.hpp:185
PMatrix contains the main data structure and the computational routine for the parallel selected inve...
Definition: ngchol_interf.hpp:57
void SendRecvCD_UpdateU(std::vector< SuperNodeBufferType > &arrSuperNodes, Int stepSuper)
SendRecvCD_UpdateU.
Definition: pselinv_impl.hpp:861
Int BlockIdx(Int i, const SuperNodeType *s)
BlockIdx returns the block index of a column i.
Definition: pselinv.hpp:332
Int numRow
Number of nonzero rows.
Definition: pselinv.hpp:240
Int CountSendToRight(Int ksup)
CountSendToRight returns the number of processors to the right of current processor with which it has...
Definition: pselinv.hpp:726
Int CountSendToBelow(Int ksup)
CountSendToBelow returns the number of processors below current processor with which it has to commun...
Definition: pselinv.hpp:730
Int CEILING(Int a, Int b)
CEILING is used for computing the storage space for local number of blocks.
Definition: pselinv.hpp:328
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:249
Int NumSuper(const SuperNodeType *s)
NumSuper returns the total number of supernodes.
Definition: pselinv.hpp:353
void SelInv_P2p()
Point-to-point version of the selected inversion.
Definition: pselinv_impl.hpp:3646
Int CountRecvFromBelow(Int ksup)
CountRecvFromBelow returns the number of processors below the current processor from which it receive...
Definition: pselinv.hpp:734
Int NumBlockL(Int jLocal) const
NumBlockL returns the number of nonzero L blocks for the local block column jLocal.
Definition: pselinv.hpp:698
NumMat< T > nzval
Dimension numRow * numCol, nonzero elements.
Definition: pselinv.hpp:198
A thin interface for passing parameters to set the PSelInv options.
Definition: pselinv.hpp:102
IntNumVec cols
Dimension numCol * 1, index (0-based) for the number of nonzero rows.
Definition: pselinv.hpp:246
Int NumBlockU(Int iLocal) const
NumBlockU returns the number of nonzero U blocks for the local block row iLocal.
Definition: pselinv.hpp:702
virtual void SelInv()
SelInv is the main function for the selected inversion.
Definition: pselinv_impl.hpp:3593
LongInt Nnz()
Nnz computes the total number of nonzero elements in the PMatrix.
Definition: pselinv_impl.hpp:5197
void SelInv_lookup_indexes(SuperNodeBufferType &snode, std::vector< LBlock< T > > &LcolRecv, std::vector< UBlock< T > > &UrowRecv, NumMat< T > &AinvBuf, NumMat< T > &UBuf)
SelInv_lookup_indexes.
Definition: pselinv_impl.hpp:504
void ComputeDiagUpdate(SuperNodeBufferType &snode)
ComputeDiagUpdate.
Definition: pselinv_impl.hpp:1162
DistSparseMatrix describes a Sparse matrix in the compressed sparse column format (CSC) and distribut...
Definition: sparse_matrix.hpp:91
PMatrixUnsym contains the main data structure and the computational routine for the parallel selected...
Definition: pselinv.hpp:991
Int MYROW(const GridType *g)
MYROW returns my processor row.
Definition: pselinv.hpp:284